xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cf019ec6f319a509de7602f53cdadf853cfc8a83)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) {
608               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
609                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
610               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
611             }
612           } else col = in[j];
613           nonew = b->nonew;
614           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
615         }
616       }
617     } else {
618       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
619       if (!aij->donotstash) {
620         mat->assembled = PETSC_FALSE;
621         if (roworiented) {
622           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
623         } else {
624           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
625         }
626       }
627     }
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   PetscErrorCode ierr;
636   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
637   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
638 
639   PetscFunctionBegin;
640   for (i=0; i<m; i++) {
641     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
642     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
643     if (idxm[i] >= rstart && idxm[i] < rend) {
644       row = idxm[i] - rstart;
645       for (j=0; j<n; j++) {
646         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
647         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
648         if (idxn[j] >= cstart && idxn[j] < cend) {
649           col  = idxn[j] - cstart;
650           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
651         } else {
652           if (!aij->colmap) {
653             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
654           }
655 #if defined(PETSC_USE_CTABLE)
656           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
657           col--;
658 #else
659           col = aij->colmap[idxn[j]] - 1;
660 #endif
661           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
662           else {
663             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
664           }
665         }
666       }
667     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
668   }
669   PetscFunctionReturn(0);
670 }
671 
672 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
673 
674 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
675 {
676   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
677   PetscErrorCode ierr;
678   PetscInt       nstash,reallocs;
679 
680   PetscFunctionBegin;
681   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
682 
683   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
684   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
685   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
690 {
691   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
692   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
693   PetscErrorCode ierr;
694   PetscMPIInt    n;
695   PetscInt       i,j,rstart,ncols,flg;
696   PetscInt       *row,*col;
697   PetscBool      other_disassembled;
698   PetscScalar    *val;
699 
700   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
701 
702   PetscFunctionBegin;
703   if (!aij->donotstash && !mat->nooffprocentries) {
704     while (1) {
705       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
706       if (!flg) break;
707 
708       for (i=0; i<n; ) {
709         /* Now identify the consecutive vals belonging to the same row */
710         for (j=i,rstart=row[j]; j<n; j++) {
711           if (row[j] != rstart) break;
712         }
713         if (j < n) ncols = j-i;
714         else       ncols = n-i;
715         /* Now assemble all these values with a single function call */
716         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
717 
718         i = j;
719       }
720     }
721     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
722   }
723   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
724   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
725 
726   /* determine if any processor has disassembled, if so we must
727      also disassemble ourselfs, in order that we may reassemble. */
728   /*
729      if nonzero structure of submatrix B cannot change then we know that
730      no processor disassembled thus we can skip this stuff
731   */
732   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
733     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734     if (mat->was_assembled && !other_disassembled) {
735       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
736     }
737   }
738   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
739     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
740   }
741   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
742   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
743   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
744 
745   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
746 
747   aij->rowvalues = 0;
748 
749   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
750   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
751 
752   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
753   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
754     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
755     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756   }
757   PetscFunctionReturn(0);
758 }
759 
760 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
761 {
762   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764 
765   PetscFunctionBegin;
766   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
767   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
772 {
773   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
774   PetscInt      *lrows;
775   PetscInt       r, len;
776   PetscErrorCode ierr;
777 
778   PetscFunctionBegin;
779   /* get locally owned rows */
780   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
781   /* fix right hand side if needed */
782   if (x && b) {
783     const PetscScalar *xx;
784     PetscScalar       *bb;
785 
786     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
788     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
789     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
790     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
791   }
792   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
793   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
794   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
795     PetscBool cong;
796     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
797     if (cong) A->congruentlayouts = 1;
798     else      A->congruentlayouts = 0;
799   }
800   if ((diag != 0.0) && A->congruentlayouts) {
801     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
802   } else if (diag != 0.0) {
803     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
805     for (r = 0; r < len; ++r) {
806       const PetscInt row = lrows[r] + A->rmap->rstart;
807       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
808     }
809     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
810     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811   } else {
812     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
813   }
814   ierr = PetscFree(lrows);CHKERRQ(ierr);
815 
816   /* only change matrix nonzero state if pattern was allowed to be changed */
817   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
818     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
819     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
825 {
826   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode    ierr;
828   PetscMPIInt       n = A->rmap->n;
829   PetscInt          i,j,r,m,p = 0,len = 0;
830   PetscInt          *lrows,*owners = A->rmap->range;
831   PetscSFNode       *rrows;
832   PetscSF           sf;
833   const PetscScalar *xx;
834   PetscScalar       *bb,*mask;
835   Vec               xmask,lmask;
836   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
837   const PetscInt    *aj, *ii,*ridx;
838   PetscScalar       *aa;
839 
840   PetscFunctionBegin;
841   /* Create SF where leaves are input rows and roots are owned rows */
842   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
843   for (r = 0; r < n; ++r) lrows[r] = -1;
844   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
845   for (r = 0; r < N; ++r) {
846     const PetscInt idx   = rows[r];
847     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
848     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
849       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
850     }
851     rrows[r].rank  = p;
852     rrows[r].index = rows[r] - owners[p];
853   }
854   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
855   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
856   /* Collect flags for rows to be zeroed */
857   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
858   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
859   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
860   /* Compress and put in row numbers */
861   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
862   /* zero diagonal part of matrix */
863   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
864   /* handle off diagonal part of matrix */
865   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
866   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
867   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
868   for (i=0; i<len; i++) bb[lrows[i]] = 1;
869   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
870   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
873   if (x) {
874     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
877     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
878   }
879   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
880   /* remove zeroed rows of off diagonal matrix */
881   ii = aij->i;
882   for (i=0; i<len; i++) {
883     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
884   }
885   /* loop over all elements of off process part of matrix zeroing removed columns*/
886   if (aij->compressedrow.use) {
887     m    = aij->compressedrow.nrows;
888     ii   = aij->compressedrow.i;
889     ridx = aij->compressedrow.rindex;
890     for (i=0; i<m; i++) {
891       n  = ii[i+1] - ii[i];
892       aj = aij->j + ii[i];
893       aa = aij->a + ii[i];
894 
895       for (j=0; j<n; j++) {
896         if (PetscAbsScalar(mask[*aj])) {
897           if (b) bb[*ridx] -= *aa*xx[*aj];
898           *aa = 0.0;
899         }
900         aa++;
901         aj++;
902       }
903       ridx++;
904     }
905   } else { /* do not use compressed row format */
906     m = l->B->rmap->n;
907     for (i=0; i<m; i++) {
908       n  = ii[i+1] - ii[i];
909       aj = aij->j + ii[i];
910       aa = aij->a + ii[i];
911       for (j=0; j<n; j++) {
912         if (PetscAbsScalar(mask[*aj])) {
913           if (b) bb[i] -= *aa*xx[*aj];
914           *aa = 0.0;
915         }
916         aa++;
917         aj++;
918       }
919     }
920   }
921   if (x) {
922     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
923     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
924   }
925   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
926   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
927   ierr = PetscFree(lrows);CHKERRQ(ierr);
928 
929   /* only change matrix nonzero state if pattern was allowed to be changed */
930   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
931     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
932     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
933   }
934   PetscFunctionReturn(0);
935 }
936 
937 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
938 {
939   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
940   PetscErrorCode ierr;
941   PetscInt       nt;
942   VecScatter     Mvctx = a->Mvctx;
943 
944   PetscFunctionBegin;
945   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
946   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
947 
948   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
949   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
950   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
956 {
957   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
958   PetscErrorCode ierr;
959 
960   PetscFunctionBegin;
961   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscErrorCode ierr;
969   VecScatter     Mvctx = a->Mvctx;
970 
971   PetscFunctionBegin;
972   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
973   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
975   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
981 {
982   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
983   PetscErrorCode ierr;
984   PetscBool      merged;
985 
986   PetscFunctionBegin;
987   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
988   /* do nondiagonal part */
989   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
990   if (!merged) {
991     /* send it on its way */
992     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
993     /* do local part */
994     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
995     /* receive remote parts: note this assumes the values are not actually */
996     /* added in yy until the next line, */
997     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
998   } else {
999     /* do local part */
1000     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1001     /* send it on its way */
1002     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1003     /* values actually were received in the Begin() but we need to call this nop */
1004     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1010 {
1011   MPI_Comm       comm;
1012   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1013   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1014   IS             Me,Notme;
1015   PetscErrorCode ierr;
1016   PetscInt       M,N,first,last,*notme,i;
1017   PetscMPIInt    size;
1018 
1019   PetscFunctionBegin;
1020   /* Easy test: symmetric diagonal block */
1021   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1022   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1023   if (!*f) PetscFunctionReturn(0);
1024   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1025   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1026   if (size == 1) PetscFunctionReturn(0);
1027 
1028   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1029   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1030   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1031   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1032   for (i=0; i<first; i++) notme[i] = i;
1033   for (i=last; i<M; i++) notme[i-last+first] = i;
1034   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1035   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1036   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1037   Aoff = Aoffs[0];
1038   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1039   Boff = Boffs[0];
1040   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1041   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1042   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1043   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1044   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1045   ierr = PetscFree(notme);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1050 {
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1059 {
1060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1061   PetscErrorCode ierr;
1062 
1063   PetscFunctionBegin;
1064   /* do nondiagonal part */
1065   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1066   /* send it on its way */
1067   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   /* do local part */
1069   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1070   /* receive remote parts */
1071   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 /*
1076   This only works correctly for square matrices where the subblock A->A is the
1077    diagonal block
1078 */
1079 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1080 {
1081   PetscErrorCode ierr;
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083 
1084   PetscFunctionBegin;
1085   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1086   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1087   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1098   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1103 {
1104   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108 #if defined(PETSC_USE_LOG)
1109   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1110 #endif
1111   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1112   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1113   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1114   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1115 #if defined(PETSC_USE_CTABLE)
1116   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1117 #else
1118   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1119 #endif
1120   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1121   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1122   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1123   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1124   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1125   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1126   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1127 
1128   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1137 #if defined(PETSC_HAVE_ELEMENTAL)
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1139 #endif
1140 #if defined(PETSC_HAVE_HYPRE)
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1143 #endif
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1148 {
1149   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1150   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1151   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1152   PetscErrorCode ierr;
1153   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1154   int            fd;
1155   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1156   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1157   PetscScalar    *column_values;
1158   PetscInt       message_count,flowcontrolcount;
1159   FILE           *file;
1160 
1161   PetscFunctionBegin;
1162   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1163   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1164   nz   = A->nz + B->nz;
1165   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1166   if (!rank) {
1167     header[0] = MAT_FILE_CLASSID;
1168     header[1] = mat->rmap->N;
1169     header[2] = mat->cmap->N;
1170 
1171     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     /* get largest number of rows any processor has */
1174     rlen  = mat->rmap->n;
1175     range = mat->rmap->range;
1176     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1177   } else {
1178     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1179     rlen = mat->rmap->n;
1180   }
1181 
1182   /* load up the local row counts */
1183   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1184   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1185 
1186   /* store the row lengths to the file */
1187   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1188   if (!rank) {
1189     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1190     for (i=1; i<size; i++) {
1191       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1192       rlen = range[i+1] - range[i];
1193       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1194       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1195     }
1196     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1197   } else {
1198     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1199     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1200     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1201   }
1202   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1203 
1204   /* load up the local column indices */
1205   nzmax = nz; /* th processor needs space a largest processor needs */
1206   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1207   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1208   cnt   = 0;
1209   for (i=0; i<mat->rmap->n; i++) {
1210     for (j=B->i[i]; j<B->i[i+1]; j++) {
1211       if ((col = garray[B->j[j]]) > cstart) break;
1212       column_indices[cnt++] = col;
1213     }
1214     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1215     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1216   }
1217   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1218 
1219   /* store the column indices to the file */
1220   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1221   if (!rank) {
1222     MPI_Status status;
1223     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1224     for (i=1; i<size; i++) {
1225       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1226       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1227       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1228       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1229       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1230     }
1231     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1232   } else {
1233     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1234     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1237   }
1238   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1239 
1240   /* load up the local column values */
1241   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1242   cnt  = 0;
1243   for (i=0; i<mat->rmap->n; i++) {
1244     for (j=B->i[i]; j<B->i[i+1]; j++) {
1245       if (garray[B->j[j]] > cstart) break;
1246       column_values[cnt++] = B->a[j];
1247     }
1248     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1249     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1250   }
1251   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1252 
1253   /* store the column values to the file */
1254   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1255   if (!rank) {
1256     MPI_Status status;
1257     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1258     for (i=1; i<size; i++) {
1259       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1260       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1261       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1262       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1264     }
1265     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1266   } else {
1267     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1268     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1271   }
1272   ierr = PetscFree(column_values);CHKERRQ(ierr);
1273 
1274   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1275   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #include <petscdraw.h>
1280 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1281 {
1282   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1283   PetscErrorCode    ierr;
1284   PetscMPIInt       rank = aij->rank,size = aij->size;
1285   PetscBool         isdraw,iascii,isbinary;
1286   PetscViewer       sviewer;
1287   PetscViewerFormat format;
1288 
1289   PetscFunctionBegin;
1290   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1291   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1292   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1293   if (iascii) {
1294     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1295     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1296       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1297       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1298       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1299       for (i=0; i<(PetscInt)size; i++) {
1300         nmax = PetscMax(nmax,nz[i]);
1301         nmin = PetscMin(nmin,nz[i]);
1302         navg += nz[i];
1303       }
1304       ierr = PetscFree(nz);CHKERRQ(ierr);
1305       navg = navg/size;
1306       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1307       PetscFunctionReturn(0);
1308     }
1309     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1310     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1311       MatInfo   info;
1312       PetscBool inodes;
1313 
1314       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1315       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1317       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1318       if (!inodes) {
1319         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1320                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1321       } else {
1322         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1323                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1324       }
1325       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1326       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1327       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1329       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1331       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1332       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1335       PetscInt inodecount,inodelimit,*inodes;
1336       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1337       if (inodes) {
1338         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1339       } else {
1340         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1341       }
1342       PetscFunctionReturn(0);
1343     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1344       PetscFunctionReturn(0);
1345     }
1346   } else if (isbinary) {
1347     if (size == 1) {
1348       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1349       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1350     } else {
1351       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1352     }
1353     PetscFunctionReturn(0);
1354   } else if (isdraw) {
1355     PetscDraw draw;
1356     PetscBool isnull;
1357     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1358     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1359     if (isnull) PetscFunctionReturn(0);
1360   }
1361 
1362   {
1363     /* assemble the entire matrix onto first processor. */
1364     Mat        A;
1365     Mat_SeqAIJ *Aloc;
1366     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1367     MatScalar  *a;
1368 
1369     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1370     if (!rank) {
1371       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1372     } else {
1373       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1374     }
1375     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1376     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1377     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1378     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1379     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1380 
1381     /* copy over the A part */
1382     Aloc = (Mat_SeqAIJ*)aij->A->data;
1383     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1384     row  = mat->rmap->rstart;
1385     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1386     for (i=0; i<m; i++) {
1387       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1388       row++;
1389       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1390     }
1391     aj = Aloc->j;
1392     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1393 
1394     /* copy over the B part */
1395     Aloc = (Mat_SeqAIJ*)aij->B->data;
1396     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1397     row  = mat->rmap->rstart;
1398     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1399     ct   = cols;
1400     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1401     for (i=0; i<m; i++) {
1402       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1403       row++;
1404       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1405     }
1406     ierr = PetscFree(ct);CHKERRQ(ierr);
1407     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409     /*
1410        Everyone has to call to draw the matrix since the graphics waits are
1411        synchronized across all processors that share the PetscDraw object
1412     */
1413     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1414     if (!rank) {
1415       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1416       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1417     }
1418     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1419     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1420     ierr = MatDestroy(&A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1426 {
1427   PetscErrorCode ierr;
1428   PetscBool      iascii,isdraw,issocket,isbinary;
1429 
1430   PetscFunctionBegin;
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1435   if (iascii || isdraw || isbinary || issocket) {
1436     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1442 {
1443   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1444   PetscErrorCode ierr;
1445   Vec            bb1 = 0;
1446   PetscBool      hasop;
1447 
1448   PetscFunctionBegin;
1449   if (flag == SOR_APPLY_UPPER) {
1450     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451     PetscFunctionReturn(0);
1452   }
1453 
1454   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1455     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1456   }
1457 
1458   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461       its--;
1462     }
1463 
1464     while (its--) {
1465       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1470       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1471 
1472       /* local sweep */
1473       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1474     }
1475   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1476     if (flag & SOR_ZERO_INITIAL_GUESS) {
1477       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1478       its--;
1479     }
1480     while (its--) {
1481       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1486       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1487 
1488       /* local sweep */
1489       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1490     }
1491   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1492     if (flag & SOR_ZERO_INITIAL_GUESS) {
1493       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1494       its--;
1495     }
1496     while (its--) {
1497       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499 
1500       /* update rhs: bb1 = bb - B*x */
1501       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1502       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1503 
1504       /* local sweep */
1505       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1506     }
1507   } else if (flag & SOR_EISENSTAT) {
1508     Vec xx1;
1509 
1510     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1511     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1512 
1513     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515     if (!mat->diag) {
1516       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1517       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1518     }
1519     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1520     if (hasop) {
1521       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1522     } else {
1523       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1524     }
1525     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1526 
1527     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1528 
1529     /* local sweep */
1530     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1531     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1532     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1533   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1534 
1535   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1536 
1537   matin->factorerrortype = mat->A->factorerrortype;
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1542 {
1543   Mat            aA,aB,Aperm;
1544   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1545   PetscScalar    *aa,*ba;
1546   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1547   PetscSF        rowsf,sf;
1548   IS             parcolp = NULL;
1549   PetscBool      done;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1554   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1555   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1556   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1557 
1558   /* Invert row permutation to find out where my rows should go */
1559   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1560   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1561   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1562   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1563   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1565 
1566   /* Invert column permutation to find out where my columns should go */
1567   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1568   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1569   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1570   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1571   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1573   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1574 
1575   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1576   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1577   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1578 
1579   /* Find out where my gcols should go */
1580   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1581   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1583   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1584   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1588 
1589   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1590   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1591   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt row = rdest[i],rowner;
1594     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1595     for (j=ai[i]; j<ai[i+1]; j++) {
1596       PetscInt cowner,col = cdest[aj[j]];
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601     for (j=bi[i]; j<bi[i+1]; j++) {
1602       PetscInt cowner,col = gcdest[bj[j]];
1603       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1604       if (rowner == cowner) dnnz[i]++;
1605       else onnz[i]++;
1606     }
1607   }
1608   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1609   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1610   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1612   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1613 
1614   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1615   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1616   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1617   for (i=0; i<m; i++) {
1618     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1619     PetscInt j0,rowlen;
1620     rowlen = ai[i+1] - ai[i];
1621     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1622       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1623       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1624     }
1625     rowlen = bi[i+1] - bi[i];
1626     for (j0=j=0; j<rowlen; j0=j) {
1627       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1628       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1633   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1634   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1635   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1637   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1638   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1639   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1640   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1641   *B = Aperm;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1646 {
1647   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1648   PetscErrorCode ierr;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1652   if (ghosts) *ghosts = aij->garray;
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1657 {
1658   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1659   Mat            A    = mat->A,B = mat->B;
1660   PetscErrorCode ierr;
1661   PetscReal      isend[5],irecv[5];
1662 
1663   PetscFunctionBegin;
1664   info->block_size = 1.0;
1665   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1666 
1667   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1668   isend[3] = info->memory;  isend[4] = info->mallocs;
1669 
1670   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1671 
1672   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1673   isend[3] += info->memory;  isend[4] += info->mallocs;
1674   if (flag == MAT_LOCAL) {
1675     info->nz_used      = isend[0];
1676     info->nz_allocated = isend[1];
1677     info->nz_unneeded  = isend[2];
1678     info->memory       = isend[3];
1679     info->mallocs      = isend[4];
1680   } else if (flag == MAT_GLOBAL_MAX) {
1681     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1682 
1683     info->nz_used      = irecv[0];
1684     info->nz_allocated = irecv[1];
1685     info->nz_unneeded  = irecv[2];
1686     info->memory       = irecv[3];
1687     info->mallocs      = irecv[4];
1688   } else if (flag == MAT_GLOBAL_SUM) {
1689     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1690 
1691     info->nz_used      = irecv[0];
1692     info->nz_allocated = irecv[1];
1693     info->nz_unneeded  = irecv[2];
1694     info->memory       = irecv[3];
1695     info->mallocs      = irecv[4];
1696   }
1697   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1698   info->fill_ratio_needed = 0;
1699   info->factor_mallocs    = 0;
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1704 {
1705   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   switch (op) {
1710   case MAT_NEW_NONZERO_LOCATIONS:
1711   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1712   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1713   case MAT_KEEP_NONZERO_PATTERN:
1714   case MAT_NEW_NONZERO_LOCATION_ERR:
1715   case MAT_USE_INODES:
1716   case MAT_IGNORE_ZERO_ENTRIES:
1717     MatCheckPreallocated(A,1);
1718     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1719     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1720     break;
1721   case MAT_ROW_ORIENTED:
1722     MatCheckPreallocated(A,1);
1723     a->roworiented = flg;
1724 
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1727     break;
1728   case MAT_NEW_DIAGONALS:
1729     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1730     break;
1731   case MAT_IGNORE_OFF_PROC_ENTRIES:
1732     a->donotstash = flg;
1733     break;
1734   case MAT_SPD:
1735     A->spd_set = PETSC_TRUE;
1736     A->spd     = flg;
1737     if (flg) {
1738       A->symmetric                  = PETSC_TRUE;
1739       A->structurally_symmetric     = PETSC_TRUE;
1740       A->symmetric_set              = PETSC_TRUE;
1741       A->structurally_symmetric_set = PETSC_TRUE;
1742     }
1743     break;
1744   case MAT_SYMMETRIC:
1745     MatCheckPreallocated(A,1);
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_STRUCTURALLY_SYMMETRIC:
1749     MatCheckPreallocated(A,1);
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     break;
1752   case MAT_HERMITIAN:
1753     MatCheckPreallocated(A,1);
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_SYMMETRY_ETERNAL:
1757     MatCheckPreallocated(A,1);
1758     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1759     break;
1760   case MAT_SUBMAT_SINGLEIS:
1761     A->submat_singleis = flg;
1762     break;
1763   case MAT_STRUCTURE_ONLY:
1764     /* The option is handled directly by MatSetOption() */
1765     break;
1766   default:
1767     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1768   }
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1773 {
1774   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1775   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1776   PetscErrorCode ierr;
1777   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1778   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1779   PetscInt       *cmap,*idx_p;
1780 
1781   PetscFunctionBegin;
1782   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1783   mat->getrowactive = PETSC_TRUE;
1784 
1785   if (!mat->rowvalues && (idx || v)) {
1786     /*
1787         allocate enough space to hold information from the longest row.
1788     */
1789     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1790     PetscInt   max = 1,tmp;
1791     for (i=0; i<matin->rmap->n; i++) {
1792       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1793       if (max < tmp) max = tmp;
1794     }
1795     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1796   }
1797 
1798   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1799   lrow = row - rstart;
1800 
1801   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1802   if (!v)   {pvA = 0; pvB = 0;}
1803   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1804   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1805   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1806   nztot = nzA + nzB;
1807 
1808   cmap = mat->garray;
1809   if (v  || idx) {
1810     if (nztot) {
1811       /* Sort by increasing column numbers, assuming A and B already sorted */
1812       PetscInt imark = -1;
1813       if (v) {
1814         *v = v_p = mat->rowvalues;
1815         for (i=0; i<nzB; i++) {
1816           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1817           else break;
1818         }
1819         imark = i;
1820         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1821         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1822       }
1823       if (idx) {
1824         *idx = idx_p = mat->rowindices;
1825         if (imark > -1) {
1826           for (i=0; i<imark; i++) {
1827             idx_p[i] = cmap[cworkB[i]];
1828           }
1829         } else {
1830           for (i=0; i<nzB; i++) {
1831             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1832             else break;
1833           }
1834           imark = i;
1835         }
1836         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1837         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1838       }
1839     } else {
1840       if (idx) *idx = 0;
1841       if (v)   *v   = 0;
1842     }
1843   }
1844   *nz  = nztot;
1845   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1846   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1851 {
1852   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1853 
1854   PetscFunctionBegin;
1855   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1856   aij->getrowactive = PETSC_FALSE;
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1861 {
1862   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1863   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1864   PetscErrorCode ierr;
1865   PetscInt       i,j,cstart = mat->cmap->rstart;
1866   PetscReal      sum = 0.0;
1867   MatScalar      *v;
1868 
1869   PetscFunctionBegin;
1870   if (aij->size == 1) {
1871     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1872   } else {
1873     if (type == NORM_FROBENIUS) {
1874       v = amat->a;
1875       for (i=0; i<amat->nz; i++) {
1876         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1877       }
1878       v = bmat->a;
1879       for (i=0; i<bmat->nz; i++) {
1880         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1881       }
1882       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1883       *norm = PetscSqrtReal(*norm);
1884       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1885     } else if (type == NORM_1) { /* max column norm */
1886       PetscReal *tmp,*tmp2;
1887       PetscInt  *jj,*garray = aij->garray;
1888       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1889       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1890       *norm = 0.0;
1891       v     = amat->a; jj = amat->j;
1892       for (j=0; j<amat->nz; j++) {
1893         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1894       }
1895       v = bmat->a; jj = bmat->j;
1896       for (j=0; j<bmat->nz; j++) {
1897         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1898       }
1899       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1900       for (j=0; j<mat->cmap->N; j++) {
1901         if (tmp2[j] > *norm) *norm = tmp2[j];
1902       }
1903       ierr = PetscFree(tmp);CHKERRQ(ierr);
1904       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1905       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1906     } else if (type == NORM_INFINITY) { /* max row norm */
1907       PetscReal ntemp = 0.0;
1908       for (j=0; j<aij->A->rmap->n; j++) {
1909         v   = amat->a + amat->i[j];
1910         sum = 0.0;
1911         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1912           sum += PetscAbsScalar(*v); v++;
1913         }
1914         v = bmat->a + bmat->i[j];
1915         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1916           sum += PetscAbsScalar(*v); v++;
1917         }
1918         if (sum > ntemp) ntemp = sum;
1919       }
1920       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1921       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1922     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1923   }
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1928 {
1929   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1930   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1931   PetscErrorCode ierr;
1932   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1933   PetscInt       cstart = A->cmap->rstart,ncol;
1934   Mat            B;
1935   MatScalar      *array;
1936 
1937   PetscFunctionBegin;
1938   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1939   ai = Aloc->i; aj = Aloc->j;
1940   bi = Bloc->i; bj = Bloc->j;
1941   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1942     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1943     PetscSFNode          *oloc;
1944     PETSC_UNUSED PetscSF sf;
1945 
1946     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1947     /* compute d_nnz for preallocation */
1948     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1949     for (i=0; i<ai[ma]; i++) {
1950       d_nnz[aj[i]]++;
1951       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1952     }
1953     /* compute local off-diagonal contributions */
1954     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1955     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1956     /* map those to global */
1957     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1958     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1959     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1960     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1961     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1962     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1963     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1964 
1965     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1966     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1967     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1968     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1969     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1970     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1971   } else {
1972     B    = *matout;
1973     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1974     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1975   }
1976 
1977   /* copy over the A part */
1978   array = Aloc->a;
1979   row   = A->rmap->rstart;
1980   for (i=0; i<ma; i++) {
1981     ncol = ai[i+1]-ai[i];
1982     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1983     row++;
1984     array += ncol; aj += ncol;
1985   }
1986   aj = Aloc->j;
1987   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1988 
1989   /* copy over the B part */
1990   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1991   array = Bloc->a;
1992   row   = A->rmap->rstart;
1993   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1994   cols_tmp = cols;
1995   for (i=0; i<mb; i++) {
1996     ncol = bi[i+1]-bi[i];
1997     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1998     row++;
1999     array += ncol; cols_tmp += ncol;
2000   }
2001   ierr = PetscFree(cols);CHKERRQ(ierr);
2002 
2003   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2004   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2005   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2006     *matout = B;
2007   } else {
2008     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2014 {
2015   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2016   Mat            a    = aij->A,b = aij->B;
2017   PetscErrorCode ierr;
2018   PetscInt       s1,s2,s3;
2019 
2020   PetscFunctionBegin;
2021   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2022   if (rr) {
2023     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2024     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2025     /* Overlap communication with computation. */
2026     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2027   }
2028   if (ll) {
2029     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2030     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2031     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2032   }
2033   /* scale  the diagonal block */
2034   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2035 
2036   if (rr) {
2037     /* Do a scatter end and then right scale the off-diagonal block */
2038     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2039     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2040   }
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2045 {
2046   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2047   PetscErrorCode ierr;
2048 
2049   PetscFunctionBegin;
2050   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2055 {
2056   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2057   Mat            a,b,c,d;
2058   PetscBool      flg;
2059   PetscErrorCode ierr;
2060 
2061   PetscFunctionBegin;
2062   a = matA->A; b = matA->B;
2063   c = matB->A; d = matB->B;
2064 
2065   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2066   if (flg) {
2067     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2068   }
2069   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2074 {
2075   PetscErrorCode ierr;
2076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2077   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2078 
2079   PetscFunctionBegin;
2080   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2081   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2082     /* because of the column compression in the off-processor part of the matrix a->B,
2083        the number of columns in a->B and b->B may be different, hence we cannot call
2084        the MatCopy() directly on the two parts. If need be, we can provide a more
2085        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2086        then copying the submatrices */
2087     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2088   } else {
2089     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2090     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2091   }
2092   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2097 {
2098   PetscErrorCode ierr;
2099 
2100   PetscFunctionBegin;
2101   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /*
2106    Computes the number of nonzeros per row needed for preallocation when X and Y
2107    have different nonzero structure.
2108 */
2109 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2110 {
2111   PetscInt       i,j,k,nzx,nzy;
2112 
2113   PetscFunctionBegin;
2114   /* Set the number of nonzeros in the new matrix */
2115   for (i=0; i<m; i++) {
2116     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2117     nzx = xi[i+1] - xi[i];
2118     nzy = yi[i+1] - yi[i];
2119     nnz[i] = 0;
2120     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2121       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2122       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2123       nnz[i]++;
2124     }
2125     for (; k<nzy; k++) nnz[i]++;
2126   }
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2131 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2132 {
2133   PetscErrorCode ierr;
2134   PetscInt       m = Y->rmap->N;
2135   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2136   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2137 
2138   PetscFunctionBegin;
2139   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2144 {
2145   PetscErrorCode ierr;
2146   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2147   PetscBLASInt   bnz,one=1;
2148   Mat_SeqAIJ     *x,*y;
2149 
2150   PetscFunctionBegin;
2151   if (str == SAME_NONZERO_PATTERN) {
2152     PetscScalar alpha = a;
2153     x    = (Mat_SeqAIJ*)xx->A->data;
2154     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2155     y    = (Mat_SeqAIJ*)yy->A->data;
2156     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2157     x    = (Mat_SeqAIJ*)xx->B->data;
2158     y    = (Mat_SeqAIJ*)yy->B->data;
2159     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2160     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2161     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2162   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2163     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2164   } else {
2165     Mat      B;
2166     PetscInt *nnz_d,*nnz_o;
2167     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2168     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2169     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2170     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2171     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2172     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2173     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2174     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2175     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2176     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2177     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2178     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2179     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2180     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2181   }
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2186 
2187 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2188 {
2189 #if defined(PETSC_USE_COMPLEX)
2190   PetscErrorCode ierr;
2191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2192 
2193   PetscFunctionBegin;
2194   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2195   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2196 #else
2197   PetscFunctionBegin;
2198 #endif
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2209   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2214 {
2215   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2216   PetscErrorCode ierr;
2217 
2218   PetscFunctionBegin;
2219   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2220   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2225 {
2226   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2227   PetscErrorCode ierr;
2228   PetscInt       i,*idxb = 0;
2229   PetscScalar    *va,*vb;
2230   Vec            vtmp;
2231 
2232   PetscFunctionBegin;
2233   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2234   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2235   if (idx) {
2236     for (i=0; i<A->rmap->n; i++) {
2237       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2238     }
2239   }
2240 
2241   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2242   if (idx) {
2243     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2244   }
2245   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2246   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2247 
2248   for (i=0; i<A->rmap->n; i++) {
2249     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2250       va[i] = vb[i];
2251       if (idx) idx[i] = a->garray[idxb[i]];
2252     }
2253   }
2254 
2255   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2256   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2257   ierr = PetscFree(idxb);CHKERRQ(ierr);
2258   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2263 {
2264   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2265   PetscErrorCode ierr;
2266   PetscInt       i,*idxb = 0;
2267   PetscScalar    *va,*vb;
2268   Vec            vtmp;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2272   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2273   if (idx) {
2274     for (i=0; i<A->cmap->n; i++) {
2275       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2276     }
2277   }
2278 
2279   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2280   if (idx) {
2281     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2282   }
2283   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2284   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2285 
2286   for (i=0; i<A->rmap->n; i++) {
2287     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2288       va[i] = vb[i];
2289       if (idx) idx[i] = a->garray[idxb[i]];
2290     }
2291   }
2292 
2293   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2294   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2295   ierr = PetscFree(idxb);CHKERRQ(ierr);
2296   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2301 {
2302   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2303   PetscInt       n      = A->rmap->n;
2304   PetscInt       cstart = A->cmap->rstart;
2305   PetscInt       *cmap  = mat->garray;
2306   PetscInt       *diagIdx, *offdiagIdx;
2307   Vec            diagV, offdiagV;
2308   PetscScalar    *a, *diagA, *offdiagA;
2309   PetscInt       r;
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2314   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2315   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2316   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2317   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2318   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2319   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2320   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2321   for (r = 0; r < n; ++r) {
2322     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2323       a[r]   = diagA[r];
2324       idx[r] = cstart + diagIdx[r];
2325     } else {
2326       a[r]   = offdiagA[r];
2327       idx[r] = cmap[offdiagIdx[r]];
2328     }
2329   }
2330   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2331   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2332   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2333   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2334   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2335   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2340 {
2341   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2342   PetscInt       n      = A->rmap->n;
2343   PetscInt       cstart = A->cmap->rstart;
2344   PetscInt       *cmap  = mat->garray;
2345   PetscInt       *diagIdx, *offdiagIdx;
2346   Vec            diagV, offdiagV;
2347   PetscScalar    *a, *diagA, *offdiagA;
2348   PetscInt       r;
2349   PetscErrorCode ierr;
2350 
2351   PetscFunctionBegin;
2352   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2353   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2354   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2355   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2356   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2357   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2358   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2359   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2360   for (r = 0; r < n; ++r) {
2361     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2362       a[r]   = diagA[r];
2363       idx[r] = cstart + diagIdx[r];
2364     } else {
2365       a[r]   = offdiagA[r];
2366       idx[r] = cmap[offdiagIdx[r]];
2367     }
2368   }
2369   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2370   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2371   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2372   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2373   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2374   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2379 {
2380   PetscErrorCode ierr;
2381   Mat            *dummy;
2382 
2383   PetscFunctionBegin;
2384   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2385   *newmat = *dummy;
2386   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2391 {
2392   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2393   PetscErrorCode ierr;
2394 
2395   PetscFunctionBegin;
2396   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2397   A->factorerrortype = a->A->factorerrortype;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2402 {
2403   PetscErrorCode ierr;
2404   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2405 
2406   PetscFunctionBegin;
2407   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2408   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2409   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2410   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2415 {
2416   PetscFunctionBegin;
2417   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2418   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 /*@
2423    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2424 
2425    Collective on Mat
2426 
2427    Input Parameters:
2428 +    A - the matrix
2429 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2430 
2431  Level: advanced
2432 
2433 @*/
2434 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2435 {
2436   PetscErrorCode       ierr;
2437 
2438   PetscFunctionBegin;
2439   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2444 {
2445   PetscErrorCode       ierr;
2446   PetscBool            sc = PETSC_FALSE,flg;
2447 
2448   PetscFunctionBegin;
2449   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2450   ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr);
2451   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2452   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2453   if (flg) {
2454     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2455   }
2456   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2461 {
2462   PetscErrorCode ierr;
2463   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2464   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2465 
2466   PetscFunctionBegin;
2467   if (!Y->preallocated) {
2468     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2469   } else if (!aij->nz) {
2470     PetscInt nonew = aij->nonew;
2471     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2472     aij->nonew = nonew;
2473   }
2474   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2479 {
2480   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2481   PetscErrorCode ierr;
2482 
2483   PetscFunctionBegin;
2484   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2485   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2486   if (d) {
2487     PetscInt rstart;
2488     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2489     *d += rstart;
2490 
2491   }
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 
2496 /* -------------------------------------------------------------------*/
2497 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2498                                        MatGetRow_MPIAIJ,
2499                                        MatRestoreRow_MPIAIJ,
2500                                        MatMult_MPIAIJ,
2501                                 /* 4*/ MatMultAdd_MPIAIJ,
2502                                        MatMultTranspose_MPIAIJ,
2503                                        MatMultTransposeAdd_MPIAIJ,
2504                                        0,
2505                                        0,
2506                                        0,
2507                                 /*10*/ 0,
2508                                        0,
2509                                        0,
2510                                        MatSOR_MPIAIJ,
2511                                        MatTranspose_MPIAIJ,
2512                                 /*15*/ MatGetInfo_MPIAIJ,
2513                                        MatEqual_MPIAIJ,
2514                                        MatGetDiagonal_MPIAIJ,
2515                                        MatDiagonalScale_MPIAIJ,
2516                                        MatNorm_MPIAIJ,
2517                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2518                                        MatAssemblyEnd_MPIAIJ,
2519                                        MatSetOption_MPIAIJ,
2520                                        MatZeroEntries_MPIAIJ,
2521                                 /*24*/ MatZeroRows_MPIAIJ,
2522                                        0,
2523                                        0,
2524                                        0,
2525                                        0,
2526                                 /*29*/ MatSetUp_MPIAIJ,
2527                                        0,
2528                                        0,
2529                                        MatGetDiagonalBlock_MPIAIJ,
2530                                        0,
2531                                 /*34*/ MatDuplicate_MPIAIJ,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                        0,
2536                                 /*39*/ MatAXPY_MPIAIJ,
2537                                        MatCreateSubMatrices_MPIAIJ,
2538                                        MatIncreaseOverlap_MPIAIJ,
2539                                        MatGetValues_MPIAIJ,
2540                                        MatCopy_MPIAIJ,
2541                                 /*44*/ MatGetRowMax_MPIAIJ,
2542                                        MatScale_MPIAIJ,
2543                                        MatShift_MPIAIJ,
2544                                        MatDiagonalSet_MPIAIJ,
2545                                        MatZeroRowsColumns_MPIAIJ,
2546                                 /*49*/ MatSetRandom_MPIAIJ,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2552                                        0,
2553                                        MatSetUnfactored_MPIAIJ,
2554                                        MatPermute_MPIAIJ,
2555                                        0,
2556                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2557                                        MatDestroy_MPIAIJ,
2558                                        MatView_MPIAIJ,
2559                                        0,
2560                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2561                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2562                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2563                                        0,
2564                                        0,
2565                                        0,
2566                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2567                                        MatGetRowMinAbs_MPIAIJ,
2568                                        0,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                 /*75*/ MatFDColoringApply_AIJ,
2573                                        MatSetFromOptions_MPIAIJ,
2574                                        0,
2575                                        0,
2576                                        MatFindZeroDiagonals_MPIAIJ,
2577                                 /*80*/ 0,
2578                                        0,
2579                                        0,
2580                                 /*83*/ MatLoad_MPIAIJ,
2581                                        MatIsSymmetric_MPIAIJ,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                        0,
2586                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2587                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2588                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2589                                        MatPtAP_MPIAIJ_MPIAIJ,
2590                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2591                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                        0,
2596                                 /*99*/ 0,
2597                                        0,
2598                                        0,
2599                                        MatConjugate_MPIAIJ,
2600                                        0,
2601                                 /*104*/MatSetValuesRow_MPIAIJ,
2602                                        MatRealPart_MPIAIJ,
2603                                        MatImaginaryPart_MPIAIJ,
2604                                        0,
2605                                        0,
2606                                 /*109*/0,
2607                                        0,
2608                                        MatGetRowMin_MPIAIJ,
2609                                        0,
2610                                        MatMissingDiagonal_MPIAIJ,
2611                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2612                                        0,
2613                                        MatGetGhosts_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                 /*119*/0,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        MatGetMultiProcBlock_MPIAIJ,
2621                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2622                                        MatGetColumnNorms_MPIAIJ,
2623                                        MatInvertBlockDiagonal_MPIAIJ,
2624                                        0,
2625                                        MatCreateSubMatricesMPI_MPIAIJ,
2626                                 /*129*/0,
2627                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2628                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2629                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2630                                        0,
2631                                 /*134*/0,
2632                                        0,
2633                                        MatRARt_MPIAIJ_MPIAIJ,
2634                                        0,
2635                                        0,
2636                                 /*139*/MatSetBlockSizes_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        MatFDColoringSetUp_MPIXAIJ,
2640                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2641                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2642 };
2643 
2644 /* ----------------------------------------------------------------------------------------*/
2645 
2646 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2647 {
2648   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2649   PetscErrorCode ierr;
2650 
2651   PetscFunctionBegin;
2652   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2653   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2658 {
2659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2660   PetscErrorCode ierr;
2661 
2662   PetscFunctionBegin;
2663   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2664   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2669 {
2670   Mat_MPIAIJ     *b;
2671   PetscErrorCode ierr;
2672 
2673   PetscFunctionBegin;
2674   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2675   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2676   b = (Mat_MPIAIJ*)B->data;
2677 
2678 #if defined(PETSC_USE_CTABLE)
2679   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2680 #else
2681   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2682 #endif
2683   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2684   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2685   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2686 
2687   /* Because the B will have been resized we simply destroy it and create a new one each time */
2688   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2689   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2690   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2691   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2692   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2693   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2694 
2695   if (!B->preallocated) {
2696     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2697     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2698     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2699     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2700     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2701   }
2702 
2703   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2704   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2705   B->preallocated  = PETSC_TRUE;
2706   B->was_assembled = PETSC_FALSE;
2707   B->assembled     = PETSC_FALSE;;
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2712 {
2713   Mat_MPIAIJ     *b;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2718   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2719   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2720   b = (Mat_MPIAIJ*)B->data;
2721 
2722 #if defined(PETSC_USE_CTABLE)
2723   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2724 #else
2725   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2726 #endif
2727   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2728   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2729   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2730 
2731   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2732   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2733   B->preallocated  = PETSC_TRUE;
2734   B->was_assembled = PETSC_FALSE;
2735   B->assembled = PETSC_FALSE;
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2740 {
2741   Mat            mat;
2742   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   *newmat = 0;
2747   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2748   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2749   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2750   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2751   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2752   a       = (Mat_MPIAIJ*)mat->data;
2753 
2754   mat->factortype   = matin->factortype;
2755   mat->assembled    = PETSC_TRUE;
2756   mat->insertmode   = NOT_SET_VALUES;
2757   mat->preallocated = PETSC_TRUE;
2758 
2759   a->size         = oldmat->size;
2760   a->rank         = oldmat->rank;
2761   a->donotstash   = oldmat->donotstash;
2762   a->roworiented  = oldmat->roworiented;
2763   a->rowindices   = 0;
2764   a->rowvalues    = 0;
2765   a->getrowactive = PETSC_FALSE;
2766 
2767   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2768   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2769 
2770   if (oldmat->colmap) {
2771 #if defined(PETSC_USE_CTABLE)
2772     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2773 #else
2774     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2775     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2776     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2777 #endif
2778   } else a->colmap = 0;
2779   if (oldmat->garray) {
2780     PetscInt len;
2781     len  = oldmat->B->cmap->n;
2782     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2783     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2784     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2785   } else a->garray = 0;
2786 
2787   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2788   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2789   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2790   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2791 
2792   if (oldmat->Mvctx_mpi1) {
2793     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2794     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2795   }
2796 
2797   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2798   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2799   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2800   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2801   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2802   *newmat = mat;
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2807 {
2808   PetscScalar    *vals,*svals;
2809   MPI_Comm       comm;
2810   PetscErrorCode ierr;
2811   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2812   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2813   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2814   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2815   PetscInt       cend,cstart,n,*rowners;
2816   int            fd;
2817   PetscInt       bs = newMat->rmap->bs;
2818 
2819   PetscFunctionBegin;
2820   /* force binary viewer to load .info file if it has not yet done so */
2821   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2822   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2823   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2824   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2825   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2826   if (!rank) {
2827     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2828     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2829     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2830   }
2831 
2832   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2833   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2834   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2835   if (bs < 0) bs = 1;
2836 
2837   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2838   M    = header[1]; N = header[2];
2839 
2840   /* If global sizes are set, check if they are consistent with that given in the file */
2841   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2842   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2843 
2844   /* determine ownership of all (block) rows */
2845   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2846   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2847   else m = newMat->rmap->n; /* Set by user */
2848 
2849   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2850   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2851 
2852   /* First process needs enough room for process with most rows */
2853   if (!rank) {
2854     mmax = rowners[1];
2855     for (i=2; i<=size; i++) {
2856       mmax = PetscMax(mmax, rowners[i]);
2857     }
2858   } else mmax = -1;             /* unused, but compilers complain */
2859 
2860   rowners[0] = 0;
2861   for (i=2; i<=size; i++) {
2862     rowners[i] += rowners[i-1];
2863   }
2864   rstart = rowners[rank];
2865   rend   = rowners[rank+1];
2866 
2867   /* distribute row lengths to all processors */
2868   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2869   if (!rank) {
2870     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2871     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2872     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2873     for (j=0; j<m; j++) {
2874       procsnz[0] += ourlens[j];
2875     }
2876     for (i=1; i<size; i++) {
2877       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2878       /* calculate the number of nonzeros on each processor */
2879       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2880         procsnz[i] += rowlengths[j];
2881       }
2882       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2883     }
2884     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2885   } else {
2886     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2887   }
2888 
2889   if (!rank) {
2890     /* determine max buffer needed and allocate it */
2891     maxnz = 0;
2892     for (i=0; i<size; i++) {
2893       maxnz = PetscMax(maxnz,procsnz[i]);
2894     }
2895     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2896 
2897     /* read in my part of the matrix column indices  */
2898     nz   = procsnz[0];
2899     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2900     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2901 
2902     /* read in every one elses and ship off */
2903     for (i=1; i<size; i++) {
2904       nz   = procsnz[i];
2905       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2906       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2907     }
2908     ierr = PetscFree(cols);CHKERRQ(ierr);
2909   } else {
2910     /* determine buffer space needed for message */
2911     nz = 0;
2912     for (i=0; i<m; i++) {
2913       nz += ourlens[i];
2914     }
2915     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2916 
2917     /* receive message of column indices*/
2918     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2919   }
2920 
2921   /* determine column ownership if matrix is not square */
2922   if (N != M) {
2923     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2924     else n = newMat->cmap->n;
2925     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2926     cstart = cend - n;
2927   } else {
2928     cstart = rstart;
2929     cend   = rend;
2930     n      = cend - cstart;
2931   }
2932 
2933   /* loop over local rows, determining number of off diagonal entries */
2934   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2935   jj   = 0;
2936   for (i=0; i<m; i++) {
2937     for (j=0; j<ourlens[i]; j++) {
2938       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2939       jj++;
2940     }
2941   }
2942 
2943   for (i=0; i<m; i++) {
2944     ourlens[i] -= offlens[i];
2945   }
2946   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2947 
2948   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2949 
2950   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2951 
2952   for (i=0; i<m; i++) {
2953     ourlens[i] += offlens[i];
2954   }
2955 
2956   if (!rank) {
2957     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2958 
2959     /* read in my part of the matrix numerical values  */
2960     nz   = procsnz[0];
2961     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2962 
2963     /* insert into matrix */
2964     jj      = rstart;
2965     smycols = mycols;
2966     svals   = vals;
2967     for (i=0; i<m; i++) {
2968       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2969       smycols += ourlens[i];
2970       svals   += ourlens[i];
2971       jj++;
2972     }
2973 
2974     /* read in other processors and ship out */
2975     for (i=1; i<size; i++) {
2976       nz   = procsnz[i];
2977       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2978       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2979     }
2980     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2981   } else {
2982     /* receive numeric values */
2983     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2984 
2985     /* receive message of values*/
2986     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2987 
2988     /* insert into matrix */
2989     jj      = rstart;
2990     smycols = mycols;
2991     svals   = vals;
2992     for (i=0; i<m; i++) {
2993       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2994       smycols += ourlens[i];
2995       svals   += ourlens[i];
2996       jj++;
2997     }
2998   }
2999   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3000   ierr = PetscFree(vals);CHKERRQ(ierr);
3001   ierr = PetscFree(mycols);CHKERRQ(ierr);
3002   ierr = PetscFree(rowners);CHKERRQ(ierr);
3003   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3004   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 /* Not scalable because of ISAllGather() unless getting all columns. */
3009 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3010 {
3011   PetscErrorCode ierr;
3012   IS             iscol_local;
3013   PetscBool      isstride;
3014   PetscMPIInt    lisstride=0,gisstride;
3015 
3016   PetscFunctionBegin;
3017   /* check if we are grabbing all columns*/
3018   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3019 
3020   if (isstride) {
3021     PetscInt  start,len,mstart,mlen;
3022     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3023     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3024     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3025     if (mstart == start && mlen-mstart == len) lisstride = 1;
3026   }
3027 
3028   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3029   if (gisstride) {
3030     PetscInt N;
3031     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3032     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3033     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3034     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3035   } else {
3036     PetscInt cbs;
3037     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3038     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3039     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3040   }
3041 
3042   *isseq = iscol_local;
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 /*
3047  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3048  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3049 
3050  Input Parameters:
3051    mat - matrix
3052    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3053            i.e., mat->rstart <= isrow[i] < mat->rend
3054    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3055            i.e., mat->cstart <= iscol[i] < mat->cend
3056  Output Parameter:
3057    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3058    iscol_o - sequential column index set for retrieving mat->B
3059    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3060  */
3061 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3062 {
3063   PetscErrorCode ierr;
3064   Vec            x,cmap;
3065   const PetscInt *is_idx;
3066   PetscScalar    *xarray,*cmaparray;
3067   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3068   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3069   Mat            B=a->B;
3070   Vec            lvec=a->lvec,lcmap;
3071   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3072   MPI_Comm       comm;
3073   VecScatter     Mvctx=a->Mvctx;
3074 
3075   PetscFunctionBegin;
3076   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3077   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3078 
3079   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3080   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3081   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3082   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3083   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3084 
3085   /* Get start indices */
3086   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3087   isstart -= ncols;
3088   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3089 
3090   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3091   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3092   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3093   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3094   for (i=0; i<ncols; i++) {
3095     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3096     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3097     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3098   }
3099   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3100   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3101   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3102 
3103   /* Get iscol_d */
3104   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3105   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3106   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3107 
3108   /* Get isrow_d */
3109   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3110   rstart = mat->rmap->rstart;
3111   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3112   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3113   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3114   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3115 
3116   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3117   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3118   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3119 
3120   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3121   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3122   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3123 
3124   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3125 
3126   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3127   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3128 
3129   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3130   /* off-process column indices */
3131   count = 0;
3132   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3133   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3134 
3135   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3136   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3137   for (i=0; i<Bn; i++) {
3138     if (PetscRealPart(xarray[i]) > -1.0) {
3139       idx[count]     = i;                   /* local column index in off-diagonal part B */
3140       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3141       count++;
3142     }
3143   }
3144   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3145   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3146 
3147   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3148   /* cannot ensure iscol_o has same blocksize as iscol! */
3149 
3150   ierr = PetscFree(idx);CHKERRQ(ierr);
3151   *garray = cmap1;
3152 
3153   ierr = VecDestroy(&x);CHKERRQ(ierr);
3154   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3155   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3160 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3161 {
3162   PetscErrorCode ierr;
3163   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3164   Mat            M = NULL;
3165   MPI_Comm       comm;
3166   IS             iscol_d,isrow_d,iscol_o;
3167   Mat            Asub = NULL,Bsub = NULL;
3168   PetscInt       n;
3169 
3170   PetscFunctionBegin;
3171   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3172 
3173   if (call == MAT_REUSE_MATRIX) {
3174     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3175     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3176     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3177 
3178     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3179     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3180 
3181     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3182     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3183 
3184     /* Update diagonal and off-diagonal portions of submat */
3185     asub = (Mat_MPIAIJ*)(*submat)->data;
3186     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3187     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3188     if (n) {
3189       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3190     }
3191     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3192     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3193 
3194   } else { /* call == MAT_INITIAL_MATRIX) */
3195     const PetscInt *garray;
3196     PetscInt        BsubN;
3197 
3198     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3199     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3200 
3201     /* Create local submatrices Asub and Bsub */
3202     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3203     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3204 
3205     /* Create submatrix M */
3206     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3207 
3208     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3209     asub = (Mat_MPIAIJ*)M->data;
3210 
3211     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3212     n = asub->B->cmap->N;
3213     if (BsubN > n) {
3214       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3215       const PetscInt *idx;
3216       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3217       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3218 
3219       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3220       j = 0;
3221       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3222       for (i=0; i<n; i++) {
3223         if (j >= BsubN) break;
3224         while (subgarray[i] > garray[j]) j++;
3225 
3226         if (subgarray[i] == garray[j]) {
3227           idx_new[i] = idx[j++];
3228         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3229       }
3230       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3231 
3232       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3233       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3234 
3235     } else if (BsubN < n) {
3236       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3237     }
3238 
3239     ierr = PetscFree(garray);CHKERRQ(ierr);
3240     *submat = M;
3241 
3242     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3243     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3244     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3245 
3246     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3247     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3248 
3249     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3250     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3251   }
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3256 {
3257   PetscErrorCode ierr;
3258   IS             iscol_local=NULL,isrow_d;
3259   PetscInt       csize;
3260   PetscInt       n,i,j,start,end;
3261   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3262   MPI_Comm       comm;
3263 
3264   PetscFunctionBegin;
3265   /* If isrow has same processor distribution as mat,
3266      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3267   if (call == MAT_REUSE_MATRIX) {
3268     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3269     if (isrow_d) {
3270       sameRowDist  = PETSC_TRUE;
3271       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3272     } else {
3273       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3274       if (iscol_local) {
3275         sameRowDist  = PETSC_TRUE;
3276         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3277       }
3278     }
3279   } else {
3280     /* Check if isrow has same processor distribution as mat */
3281     sameDist[0] = PETSC_FALSE;
3282     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3283     if (!n) {
3284       sameDist[0] = PETSC_TRUE;
3285     } else {
3286       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3287       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3288       if (i >= start && j < end) {
3289         sameDist[0] = PETSC_TRUE;
3290       }
3291     }
3292 
3293     /* Check if iscol has same processor distribution as mat */
3294     sameDist[1] = PETSC_FALSE;
3295     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3296     if (!n) {
3297       sameDist[1] = PETSC_TRUE;
3298     } else {
3299       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3300       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3301       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3302     }
3303 
3304     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3305     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3306     sameRowDist = tsameDist[0];
3307   }
3308 
3309   if (sameRowDist) {
3310     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3311       /* isrow and iscol have same processor distribution as mat */
3312       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3313       PetscFunctionReturn(0);
3314     } else { /* sameRowDist */
3315       /* isrow has same processor distribution as mat */
3316       if (call == MAT_INITIAL_MATRIX) {
3317         PetscBool sorted;
3318         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3319         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3320         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3321         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3322 
3323         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3324         if (sorted) {
3325           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3326           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3327           PetscFunctionReturn(0);
3328         }
3329       } else { /* call == MAT_REUSE_MATRIX */
3330         IS    iscol_sub;
3331         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3332         if (iscol_sub) {
3333           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3334           PetscFunctionReturn(0);
3335         }
3336       }
3337     }
3338   }
3339 
3340   /* General case: iscol -> iscol_local which has global size of iscol */
3341   if (call == MAT_REUSE_MATRIX) {
3342     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3343     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3344   } else {
3345     if (!iscol_local) {
3346       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3347     }
3348   }
3349 
3350   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3351   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3352 
3353   if (call == MAT_INITIAL_MATRIX) {
3354     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3355     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3356   }
3357   PetscFunctionReturn(0);
3358 }
3359 
3360 /*@C
3361      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3362          and "off-diagonal" part of the matrix in CSR format.
3363 
3364    Collective on MPI_Comm
3365 
3366    Input Parameters:
3367 +  comm - MPI communicator
3368 .  A - "diagonal" portion of matrix
3369 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3370 -  garray - global index of B columns
3371 
3372    Output Parameter:
3373 .   mat - the matrix, with input A as its local diagonal matrix
3374    Level: advanced
3375 
3376    Notes:
3377        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3378        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3379 
3380 .seealso: MatCreateMPIAIJWithSplitArrays()
3381 @*/
3382 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3383 {
3384   PetscErrorCode ierr;
3385   Mat_MPIAIJ     *maij;
3386   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3387   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3388   PetscScalar    *oa=b->a;
3389   Mat            Bnew;
3390   PetscInt       m,n,N;
3391 
3392   PetscFunctionBegin;
3393   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3394   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3395   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3396   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3397   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3398   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3399 
3400   /* Get global columns of mat */
3401   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3402 
3403   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3404   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3405   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3406   maij = (Mat_MPIAIJ*)(*mat)->data;
3407 
3408   (*mat)->preallocated = PETSC_TRUE;
3409 
3410   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3411   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3412 
3413   /* Set A as diagonal portion of *mat */
3414   maij->A = A;
3415 
3416   nz = oi[m];
3417   for (i=0; i<nz; i++) {
3418     col   = oj[i];
3419     oj[i] = garray[col];
3420   }
3421 
3422    /* Set Bnew as off-diagonal portion of *mat */
3423   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3424   bnew        = (Mat_SeqAIJ*)Bnew->data;
3425   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3426   maij->B     = Bnew;
3427 
3428   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3429 
3430   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3431   b->free_a       = PETSC_FALSE;
3432   b->free_ij      = PETSC_FALSE;
3433   ierr = MatDestroy(&B);CHKERRQ(ierr);
3434 
3435   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3436   bnew->free_a       = PETSC_TRUE;
3437   bnew->free_ij      = PETSC_TRUE;
3438 
3439   /* condense columns of maij->B */
3440   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3441   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3442   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3443   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3444   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3445   PetscFunctionReturn(0);
3446 }
3447 
3448 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3449 
3450 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3451 {
3452   PetscErrorCode ierr;
3453   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3454   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3455   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3456   Mat            M,Msub,B=a->B;
3457   MatScalar      *aa;
3458   Mat_SeqAIJ     *aij;
3459   PetscInt       *garray = a->garray,*colsub,Ncols;
3460   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3461   IS             iscol_sub,iscmap;
3462   const PetscInt *is_idx,*cmap;
3463   PetscBool      allcolumns=PETSC_FALSE;
3464   MPI_Comm       comm;
3465 
3466   PetscFunctionBegin;
3467   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3468 
3469   if (call == MAT_REUSE_MATRIX) {
3470     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3471     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3472     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3473 
3474     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3475     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3476 
3477     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3478     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3479 
3480     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3481 
3482   } else { /* call == MAT_INITIAL_MATRIX) */
3483     PetscBool flg;
3484 
3485     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3486     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3487 
3488     /* (1) iscol -> nonscalable iscol_local */
3489     /* Check for special case: each processor gets entire matrix columns */
3490     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3491     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3492     if (allcolumns) {
3493       iscol_sub = iscol_local;
3494       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3495       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3496 
3497     } else {
3498       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3499       PetscInt *idx,*cmap1,k;
3500       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3501       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3502       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3503       count = 0;
3504       k     = 0;
3505       for (i=0; i<Ncols; i++) {
3506         j = is_idx[i];
3507         if (j >= cstart && j < cend) {
3508           /* diagonal part of mat */
3509           idx[count]     = j;
3510           cmap1[count++] = i; /* column index in submat */
3511         } else if (Bn) {
3512           /* off-diagonal part of mat */
3513           if (j == garray[k]) {
3514             idx[count]     = j;
3515             cmap1[count++] = i;  /* column index in submat */
3516           } else if (j > garray[k]) {
3517             while (j > garray[k] && k < Bn-1) k++;
3518             if (j == garray[k]) {
3519               idx[count]     = j;
3520               cmap1[count++] = i; /* column index in submat */
3521             }
3522           }
3523         }
3524       }
3525       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3526 
3527       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3528       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3529       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3530 
3531       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3532     }
3533 
3534     /* (3) Create sequential Msub */
3535     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3536   }
3537 
3538   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3539   aij  = (Mat_SeqAIJ*)(Msub)->data;
3540   ii   = aij->i;
3541   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3542 
3543   /*
3544       m - number of local rows
3545       Ncols - number of columns (same on all processors)
3546       rstart - first row in new global matrix generated
3547   */
3548   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3549 
3550   if (call == MAT_INITIAL_MATRIX) {
3551     /* (4) Create parallel newmat */
3552     PetscMPIInt    rank,size;
3553     PetscInt       csize;
3554 
3555     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3556     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3557 
3558     /*
3559         Determine the number of non-zeros in the diagonal and off-diagonal
3560         portions of the matrix in order to do correct preallocation
3561     */
3562 
3563     /* first get start and end of "diagonal" columns */
3564     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3565     if (csize == PETSC_DECIDE) {
3566       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3567       if (mglobal == Ncols) { /* square matrix */
3568         nlocal = m;
3569       } else {
3570         nlocal = Ncols/size + ((Ncols % size) > rank);
3571       }
3572     } else {
3573       nlocal = csize;
3574     }
3575     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3576     rstart = rend - nlocal;
3577     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3578 
3579     /* next, compute all the lengths */
3580     jj    = aij->j;
3581     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3582     olens = dlens + m;
3583     for (i=0; i<m; i++) {
3584       jend = ii[i+1] - ii[i];
3585       olen = 0;
3586       dlen = 0;
3587       for (j=0; j<jend; j++) {
3588         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3589         else dlen++;
3590         jj++;
3591       }
3592       olens[i] = olen;
3593       dlens[i] = dlen;
3594     }
3595 
3596     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3597     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3598 
3599     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3600     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3601     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3602     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3603     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3604     ierr = PetscFree(dlens);CHKERRQ(ierr);
3605 
3606   } else { /* call == MAT_REUSE_MATRIX */
3607     M    = *newmat;
3608     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3609     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3610     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3611     /*
3612          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3613        rather than the slower MatSetValues().
3614     */
3615     M->was_assembled = PETSC_TRUE;
3616     M->assembled     = PETSC_FALSE;
3617   }
3618 
3619   /* (5) Set values of Msub to *newmat */
3620   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3621   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3622 
3623   jj   = aij->j;
3624   aa   = aij->a;
3625   for (i=0; i<m; i++) {
3626     row = rstart + i;
3627     nz  = ii[i+1] - ii[i];
3628     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3629     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3630     jj += nz; aa += nz;
3631   }
3632   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3633 
3634   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3635   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636 
3637   ierr = PetscFree(colsub);CHKERRQ(ierr);
3638 
3639   /* save Msub, iscol_sub and iscmap used in processor for next request */
3640   if (call ==  MAT_INITIAL_MATRIX) {
3641     *newmat = M;
3642     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3643     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3644 
3645     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3646     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3647 
3648     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3649     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3650 
3651     if (iscol_local) {
3652       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3653       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3654     }
3655   }
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 /*
3660     Not great since it makes two copies of the submatrix, first an SeqAIJ
3661   in local and then by concatenating the local matrices the end result.
3662   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3663 
3664   Note: This requires a sequential iscol with all indices.
3665 */
3666 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3667 {
3668   PetscErrorCode ierr;
3669   PetscMPIInt    rank,size;
3670   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3671   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3672   Mat            M,Mreuse;
3673   MatScalar      *aa,*vwork;
3674   MPI_Comm       comm;
3675   Mat_SeqAIJ     *aij;
3676   PetscBool      colflag,allcolumns=PETSC_FALSE;
3677 
3678   PetscFunctionBegin;
3679   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3680   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3681   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3682 
3683   /* Check for special case: each processor gets entire matrix columns */
3684   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3685   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3686   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3687 
3688   if (call ==  MAT_REUSE_MATRIX) {
3689     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3690     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3691     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3692   } else {
3693     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3694   }
3695 
3696   /*
3697       m - number of local rows
3698       n - number of columns (same on all processors)
3699       rstart - first row in new global matrix generated
3700   */
3701   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3702   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3703   if (call == MAT_INITIAL_MATRIX) {
3704     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3705     ii  = aij->i;
3706     jj  = aij->j;
3707 
3708     /*
3709         Determine the number of non-zeros in the diagonal and off-diagonal
3710         portions of the matrix in order to do correct preallocation
3711     */
3712 
3713     /* first get start and end of "diagonal" columns */
3714     if (csize == PETSC_DECIDE) {
3715       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3716       if (mglobal == n) { /* square matrix */
3717         nlocal = m;
3718       } else {
3719         nlocal = n/size + ((n % size) > rank);
3720       }
3721     } else {
3722       nlocal = csize;
3723     }
3724     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3725     rstart = rend - nlocal;
3726     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3727 
3728     /* next, compute all the lengths */
3729     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3730     olens = dlens + m;
3731     for (i=0; i<m; i++) {
3732       jend = ii[i+1] - ii[i];
3733       olen = 0;
3734       dlen = 0;
3735       for (j=0; j<jend; j++) {
3736         if (*jj < rstart || *jj >= rend) olen++;
3737         else dlen++;
3738         jj++;
3739       }
3740       olens[i] = olen;
3741       dlens[i] = dlen;
3742     }
3743     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3744     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3745     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3746     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3747     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3748     ierr = PetscFree(dlens);CHKERRQ(ierr);
3749   } else {
3750     PetscInt ml,nl;
3751 
3752     M    = *newmat;
3753     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3754     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3755     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3756     /*
3757          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3758        rather than the slower MatSetValues().
3759     */
3760     M->was_assembled = PETSC_TRUE;
3761     M->assembled     = PETSC_FALSE;
3762   }
3763   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3764   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3765   ii   = aij->i;
3766   jj   = aij->j;
3767   aa   = aij->a;
3768   for (i=0; i<m; i++) {
3769     row   = rstart + i;
3770     nz    = ii[i+1] - ii[i];
3771     cwork = jj;     jj += nz;
3772     vwork = aa;     aa += nz;
3773     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3774   }
3775 
3776   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3777   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3778   *newmat = M;
3779 
3780   /* save submatrix used in processor for next request */
3781   if (call ==  MAT_INITIAL_MATRIX) {
3782     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3783     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3784   }
3785   PetscFunctionReturn(0);
3786 }
3787 
3788 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3789 {
3790   PetscInt       m,cstart, cend,j,nnz,i,d;
3791   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3792   const PetscInt *JJ;
3793   PetscScalar    *values;
3794   PetscErrorCode ierr;
3795   PetscBool      nooffprocentries;
3796 
3797   PetscFunctionBegin;
3798   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3799 
3800   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3801   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3802   m      = B->rmap->n;
3803   cstart = B->cmap->rstart;
3804   cend   = B->cmap->rend;
3805   rstart = B->rmap->rstart;
3806 
3807   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3808 
3809 #if defined(PETSC_USE_DEBUG)
3810   for (i=0; i<m; i++) {
3811     nnz = Ii[i+1]- Ii[i];
3812     JJ  = J + Ii[i];
3813     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3814     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3815     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3816   }
3817 #endif
3818 
3819   for (i=0; i<m; i++) {
3820     nnz     = Ii[i+1]- Ii[i];
3821     JJ      = J + Ii[i];
3822     nnz_max = PetscMax(nnz_max,nnz);
3823     d       = 0;
3824     for (j=0; j<nnz; j++) {
3825       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3826     }
3827     d_nnz[i] = d;
3828     o_nnz[i] = nnz - d;
3829   }
3830   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3831   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3832 
3833   if (v) values = (PetscScalar*)v;
3834   else {
3835     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3836   }
3837 
3838   for (i=0; i<m; i++) {
3839     ii   = i + rstart;
3840     nnz  = Ii[i+1]- Ii[i];
3841     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3842   }
3843   nooffprocentries    = B->nooffprocentries;
3844   B->nooffprocentries = PETSC_TRUE;
3845   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3847   B->nooffprocentries = nooffprocentries;
3848 
3849   if (!v) {
3850     ierr = PetscFree(values);CHKERRQ(ierr);
3851   }
3852   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 /*@
3857    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3858    (the default parallel PETSc format).
3859 
3860    Collective on MPI_Comm
3861 
3862    Input Parameters:
3863 +  B - the matrix
3864 .  i - the indices into j for the start of each local row (starts with zero)
3865 .  j - the column indices for each local row (starts with zero)
3866 -  v - optional values in the matrix
3867 
3868    Level: developer
3869 
3870    Notes:
3871        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3872      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3873      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3874 
3875        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3876 
3877        The format which is used for the sparse matrix input, is equivalent to a
3878     row-major ordering.. i.e for the following matrix, the input data expected is
3879     as shown
3880 
3881 $        1 0 0
3882 $        2 0 3     P0
3883 $       -------
3884 $        4 5 6     P1
3885 $
3886 $     Process0 [P0]: rows_owned=[0,1]
3887 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3888 $        j =  {0,0,2}  [size = 3]
3889 $        v =  {1,2,3}  [size = 3]
3890 $
3891 $     Process1 [P1]: rows_owned=[2]
3892 $        i =  {0,3}    [size = nrow+1  = 1+1]
3893 $        j =  {0,1,2}  [size = 3]
3894 $        v =  {4,5,6}  [size = 3]
3895 
3896 .keywords: matrix, aij, compressed row, sparse, parallel
3897 
3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3899           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3900 @*/
3901 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3902 {
3903   PetscErrorCode ierr;
3904 
3905   PetscFunctionBegin;
3906   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3907   PetscFunctionReturn(0);
3908 }
3909 
3910 /*@C
3911    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3912    (the default parallel PETSc format).  For good matrix assembly performance
3913    the user should preallocate the matrix storage by setting the parameters
3914    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3915    performance can be increased by more than a factor of 50.
3916 
3917    Collective on MPI_Comm
3918 
3919    Input Parameters:
3920 +  B - the matrix
3921 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3922            (same value is used for all local rows)
3923 .  d_nnz - array containing the number of nonzeros in the various rows of the
3924            DIAGONAL portion of the local submatrix (possibly different for each row)
3925            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3926            The size of this array is equal to the number of local rows, i.e 'm'.
3927            For matrices that will be factored, you must leave room for (and set)
3928            the diagonal entry even if it is zero.
3929 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3930            submatrix (same value is used for all local rows).
3931 -  o_nnz - array containing the number of nonzeros in the various rows of the
3932            OFF-DIAGONAL portion of the local submatrix (possibly different for
3933            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3934            structure. The size of this array is equal to the number
3935            of local rows, i.e 'm'.
3936 
3937    If the *_nnz parameter is given then the *_nz parameter is ignored
3938 
3939    The AIJ format (also called the Yale sparse matrix format or
3940    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3941    storage.  The stored row and column indices begin with zero.
3942    See Users-Manual: ch_mat for details.
3943 
3944    The parallel matrix is partitioned such that the first m0 rows belong to
3945    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3946    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3947 
3948    The DIAGONAL portion of the local submatrix of a processor can be defined
3949    as the submatrix which is obtained by extraction the part corresponding to
3950    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3951    first row that belongs to the processor, r2 is the last row belonging to
3952    the this processor, and c1-c2 is range of indices of the local part of a
3953    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3954    common case of a square matrix, the row and column ranges are the same and
3955    the DIAGONAL part is also square. The remaining portion of the local
3956    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3957 
3958    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3959 
3960    You can call MatGetInfo() to get information on how effective the preallocation was;
3961    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3962    You can also run with the option -info and look for messages with the string
3963    malloc in them to see if additional memory allocation was needed.
3964 
3965    Example usage:
3966 
3967    Consider the following 8x8 matrix with 34 non-zero values, that is
3968    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3969    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3970    as follows:
3971 
3972 .vb
3973             1  2  0  |  0  3  0  |  0  4
3974     Proc0   0  5  6  |  7  0  0  |  8  0
3975             9  0 10  | 11  0  0  | 12  0
3976     -------------------------------------
3977            13  0 14  | 15 16 17  |  0  0
3978     Proc1   0 18  0  | 19 20 21  |  0  0
3979             0  0  0  | 22 23  0  | 24  0
3980     -------------------------------------
3981     Proc2  25 26 27  |  0  0 28  | 29  0
3982            30  0  0  | 31 32 33  |  0 34
3983 .ve
3984 
3985    This can be represented as a collection of submatrices as:
3986 
3987 .vb
3988       A B C
3989       D E F
3990       G H I
3991 .ve
3992 
3993    Where the submatrices A,B,C are owned by proc0, D,E,F are
3994    owned by proc1, G,H,I are owned by proc2.
3995 
3996    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3997    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3998    The 'M','N' parameters are 8,8, and have the same values on all procs.
3999 
4000    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4001    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4002    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4003    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4004    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4005    matrix, ans [DF] as another SeqAIJ matrix.
4006 
4007    When d_nz, o_nz parameters are specified, d_nz storage elements are
4008    allocated for every row of the local diagonal submatrix, and o_nz
4009    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4010    One way to choose d_nz and o_nz is to use the max nonzerors per local
4011    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4012    In this case, the values of d_nz,o_nz are:
4013 .vb
4014      proc0 : dnz = 2, o_nz = 2
4015      proc1 : dnz = 3, o_nz = 2
4016      proc2 : dnz = 1, o_nz = 4
4017 .ve
4018    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4019    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4020    for proc3. i.e we are using 12+15+10=37 storage locations to store
4021    34 values.
4022 
4023    When d_nnz, o_nnz parameters are specified, the storage is specified
4024    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4025    In the above case the values for d_nnz,o_nnz are:
4026 .vb
4027      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4028      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4029      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4030 .ve
4031    Here the space allocated is sum of all the above values i.e 34, and
4032    hence pre-allocation is perfect.
4033 
4034    Level: intermediate
4035 
4036 .keywords: matrix, aij, compressed row, sparse, parallel
4037 
4038 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4039           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4040 @*/
4041 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4042 {
4043   PetscErrorCode ierr;
4044 
4045   PetscFunctionBegin;
4046   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4047   PetscValidType(B,1);
4048   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4049   PetscFunctionReturn(0);
4050 }
4051 
4052 /*@
4053      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4054          CSR format the local rows.
4055 
4056    Collective on MPI_Comm
4057 
4058    Input Parameters:
4059 +  comm - MPI communicator
4060 .  m - number of local rows (Cannot be PETSC_DECIDE)
4061 .  n - This value should be the same as the local size used in creating the
4062        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4063        calculated if N is given) For square matrices n is almost always m.
4064 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4065 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4066 .   i - row indices
4067 .   j - column indices
4068 -   a - matrix values
4069 
4070    Output Parameter:
4071 .   mat - the matrix
4072 
4073    Level: intermediate
4074 
4075    Notes:
4076        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4077      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4078      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4079 
4080        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4081 
4082        The format which is used for the sparse matrix input, is equivalent to a
4083     row-major ordering.. i.e for the following matrix, the input data expected is
4084     as shown
4085 
4086 $        1 0 0
4087 $        2 0 3     P0
4088 $       -------
4089 $        4 5 6     P1
4090 $
4091 $     Process0 [P0]: rows_owned=[0,1]
4092 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4093 $        j =  {0,0,2}  [size = 3]
4094 $        v =  {1,2,3}  [size = 3]
4095 $
4096 $     Process1 [P1]: rows_owned=[2]
4097 $        i =  {0,3}    [size = nrow+1  = 1+1]
4098 $        j =  {0,1,2}  [size = 3]
4099 $        v =  {4,5,6}  [size = 3]
4100 
4101 .keywords: matrix, aij, compressed row, sparse, parallel
4102 
4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4104           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4105 @*/
4106 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4107 {
4108   PetscErrorCode ierr;
4109 
4110   PetscFunctionBegin;
4111   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4112   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4113   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4114   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4115   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4116   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4117   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4118   PetscFunctionReturn(0);
4119 }
4120 
4121 /*@C
4122    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4123    (the default parallel PETSc format).  For good matrix assembly performance
4124    the user should preallocate the matrix storage by setting the parameters
4125    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4126    performance can be increased by more than a factor of 50.
4127 
4128    Collective on MPI_Comm
4129 
4130    Input Parameters:
4131 +  comm - MPI communicator
4132 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4133            This value should be the same as the local size used in creating the
4134            y vector for the matrix-vector product y = Ax.
4135 .  n - This value should be the same as the local size used in creating the
4136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4137        calculated if N is given) For square matrices n is almost always m.
4138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4140 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4141            (same value is used for all local rows)
4142 .  d_nnz - array containing the number of nonzeros in the various rows of the
4143            DIAGONAL portion of the local submatrix (possibly different for each row)
4144            or NULL, if d_nz is used to specify the nonzero structure.
4145            The size of this array is equal to the number of local rows, i.e 'm'.
4146 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4147            submatrix (same value is used for all local rows).
4148 -  o_nnz - array containing the number of nonzeros in the various rows of the
4149            OFF-DIAGONAL portion of the local submatrix (possibly different for
4150            each row) or NULL, if o_nz is used to specify the nonzero
4151            structure. The size of this array is equal to the number
4152            of local rows, i.e 'm'.
4153 
4154    Output Parameter:
4155 .  A - the matrix
4156 
4157    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4158    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4159    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4160 
4161    Notes:
4162    If the *_nnz parameter is given then the *_nz parameter is ignored
4163 
4164    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4165    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4166    storage requirements for this matrix.
4167 
4168    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4169    processor than it must be used on all processors that share the object for
4170    that argument.
4171 
4172    The user MUST specify either the local or global matrix dimensions
4173    (possibly both).
4174 
4175    The parallel matrix is partitioned across processors such that the
4176    first m0 rows belong to process 0, the next m1 rows belong to
4177    process 1, the next m2 rows belong to process 2 etc.. where
4178    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4179    values corresponding to [m x N] submatrix.
4180 
4181    The columns are logically partitioned with the n0 columns belonging
4182    to 0th partition, the next n1 columns belonging to the next
4183    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4184 
4185    The DIAGONAL portion of the local submatrix on any given processor
4186    is the submatrix corresponding to the rows and columns m,n
4187    corresponding to the given processor. i.e diagonal matrix on
4188    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4189    etc. The remaining portion of the local submatrix [m x (N-n)]
4190    constitute the OFF-DIAGONAL portion. The example below better
4191    illustrates this concept.
4192 
4193    For a square global matrix we define each processor's diagonal portion
4194    to be its local rows and the corresponding columns (a square submatrix);
4195    each processor's off-diagonal portion encompasses the remainder of the
4196    local matrix (a rectangular submatrix).
4197 
4198    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4199 
4200    When calling this routine with a single process communicator, a matrix of
4201    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4202    type of communicator, use the construction mechanism
4203 .vb
4204      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4205 .ve
4206 
4207 $     MatCreate(...,&A);
4208 $     MatSetType(A,MATMPIAIJ);
4209 $     MatSetSizes(A, m,n,M,N);
4210 $     MatMPIAIJSetPreallocation(A,...);
4211 
4212    By default, this format uses inodes (identical nodes) when possible.
4213    We search for consecutive rows with the same nonzero structure, thereby
4214    reusing matrix information to achieve increased efficiency.
4215 
4216    Options Database Keys:
4217 +  -mat_no_inode  - Do not use inodes
4218 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4219 
4220 
4221 
4222    Example usage:
4223 
4224    Consider the following 8x8 matrix with 34 non-zero values, that is
4225    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4226    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4227    as follows
4228 
4229 .vb
4230             1  2  0  |  0  3  0  |  0  4
4231     Proc0   0  5  6  |  7  0  0  |  8  0
4232             9  0 10  | 11  0  0  | 12  0
4233     -------------------------------------
4234            13  0 14  | 15 16 17  |  0  0
4235     Proc1   0 18  0  | 19 20 21  |  0  0
4236             0  0  0  | 22 23  0  | 24  0
4237     -------------------------------------
4238     Proc2  25 26 27  |  0  0 28  | 29  0
4239            30  0  0  | 31 32 33  |  0 34
4240 .ve
4241 
4242    This can be represented as a collection of submatrices as
4243 
4244 .vb
4245       A B C
4246       D E F
4247       G H I
4248 .ve
4249 
4250    Where the submatrices A,B,C are owned by proc0, D,E,F are
4251    owned by proc1, G,H,I are owned by proc2.
4252 
4253    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4254    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4255    The 'M','N' parameters are 8,8, and have the same values on all procs.
4256 
4257    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4258    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4259    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4260    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4261    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4262    matrix, ans [DF] as another SeqAIJ matrix.
4263 
4264    When d_nz, o_nz parameters are specified, d_nz storage elements are
4265    allocated for every row of the local diagonal submatrix, and o_nz
4266    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4267    One way to choose d_nz and o_nz is to use the max nonzerors per local
4268    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4269    In this case, the values of d_nz,o_nz are
4270 .vb
4271      proc0 : dnz = 2, o_nz = 2
4272      proc1 : dnz = 3, o_nz = 2
4273      proc2 : dnz = 1, o_nz = 4
4274 .ve
4275    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4276    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4277    for proc3. i.e we are using 12+15+10=37 storage locations to store
4278    34 values.
4279 
4280    When d_nnz, o_nnz parameters are specified, the storage is specified
4281    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4282    In the above case the values for d_nnz,o_nnz are
4283 .vb
4284      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4285      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4286      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4287 .ve
4288    Here the space allocated is sum of all the above values i.e 34, and
4289    hence pre-allocation is perfect.
4290 
4291    Level: intermediate
4292 
4293 .keywords: matrix, aij, compressed row, sparse, parallel
4294 
4295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4296           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4297 @*/
4298 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4299 {
4300   PetscErrorCode ierr;
4301   PetscMPIInt    size;
4302 
4303   PetscFunctionBegin;
4304   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4305   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4306   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4307   if (size > 1) {
4308     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4309     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4310   } else {
4311     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4312     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4313   }
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4318 {
4319   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4320   PetscBool      flg;
4321   PetscErrorCode ierr;
4322 
4323   PetscFunctionBegin;
4324   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4325   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4326   if (Ad)     *Ad     = a->A;
4327   if (Ao)     *Ao     = a->B;
4328   if (colmap) *colmap = a->garray;
4329   PetscFunctionReturn(0);
4330 }
4331 
4332 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4333 {
4334   PetscErrorCode ierr;
4335   PetscInt       m,N,i,rstart,nnz,Ii;
4336   PetscInt       *indx;
4337   PetscScalar    *values;
4338 
4339   PetscFunctionBegin;
4340   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4341   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4342     PetscInt       *dnz,*onz,sum,bs,cbs;
4343 
4344     if (n == PETSC_DECIDE) {
4345       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4346     }
4347     /* Check sum(n) = N */
4348     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4349     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4350 
4351     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4352     rstart -= m;
4353 
4354     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4355     for (i=0; i<m; i++) {
4356       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4357       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4358       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4359     }
4360 
4361     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4362     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4363     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4364     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4365     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4366     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4367     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4368     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4369   }
4370 
4371   /* numeric phase */
4372   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4373   for (i=0; i<m; i++) {
4374     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4375     Ii   = i + rstart;
4376     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4377     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4378   }
4379   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4380   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4381   PetscFunctionReturn(0);
4382 }
4383 
4384 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4385 {
4386   PetscErrorCode    ierr;
4387   PetscMPIInt       rank;
4388   PetscInt          m,N,i,rstart,nnz;
4389   size_t            len;
4390   const PetscInt    *indx;
4391   PetscViewer       out;
4392   char              *name;
4393   Mat               B;
4394   const PetscScalar *values;
4395 
4396   PetscFunctionBegin;
4397   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4398   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4399   /* Should this be the type of the diagonal block of A? */
4400   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4401   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4402   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4403   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4404   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4405   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4406   for (i=0; i<m; i++) {
4407     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4408     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4409     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4410   }
4411   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4412   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4413 
4414   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4415   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4416   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4417   sprintf(name,"%s.%d",outfile,rank);
4418   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4419   ierr = PetscFree(name);CHKERRQ(ierr);
4420   ierr = MatView(B,out);CHKERRQ(ierr);
4421   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4422   ierr = MatDestroy(&B);CHKERRQ(ierr);
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4427 {
4428   PetscErrorCode      ierr;
4429   Mat_Merge_SeqsToMPI *merge;
4430   PetscContainer      container;
4431 
4432   PetscFunctionBegin;
4433   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4434   if (container) {
4435     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4436     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4445     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4446     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4447     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4448     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4449     ierr = PetscFree(merge);CHKERRQ(ierr);
4450     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4451   }
4452   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4453   PetscFunctionReturn(0);
4454 }
4455 
4456 #include <../src/mat/utils/freespace.h>
4457 #include <petscbt.h>
4458 
4459 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4460 {
4461   PetscErrorCode      ierr;
4462   MPI_Comm            comm;
4463   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4464   PetscMPIInt         size,rank,taga,*len_s;
4465   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4466   PetscInt            proc,m;
4467   PetscInt            **buf_ri,**buf_rj;
4468   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4469   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4470   MPI_Request         *s_waits,*r_waits;
4471   MPI_Status          *status;
4472   MatScalar           *aa=a->a;
4473   MatScalar           **abuf_r,*ba_i;
4474   Mat_Merge_SeqsToMPI *merge;
4475   PetscContainer      container;
4476 
4477   PetscFunctionBegin;
4478   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4479   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4480 
4481   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4482   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4483 
4484   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4485   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4486 
4487   bi     = merge->bi;
4488   bj     = merge->bj;
4489   buf_ri = merge->buf_ri;
4490   buf_rj = merge->buf_rj;
4491 
4492   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4493   owners = merge->rowmap->range;
4494   len_s  = merge->len_s;
4495 
4496   /* send and recv matrix values */
4497   /*-----------------------------*/
4498   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4499   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4500 
4501   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4502   for (proc=0,k=0; proc<size; proc++) {
4503     if (!len_s[proc]) continue;
4504     i    = owners[proc];
4505     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4506     k++;
4507   }
4508 
4509   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4510   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4511   ierr = PetscFree(status);CHKERRQ(ierr);
4512 
4513   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4514   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4515 
4516   /* insert mat values of mpimat */
4517   /*----------------------------*/
4518   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4519   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4520 
4521   for (k=0; k<merge->nrecv; k++) {
4522     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4523     nrows       = *(buf_ri_k[k]);
4524     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4525     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4526   }
4527 
4528   /* set values of ba */
4529   m = merge->rowmap->n;
4530   for (i=0; i<m; i++) {
4531     arow = owners[rank] + i;
4532     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4533     bnzi = bi[i+1] - bi[i];
4534     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4535 
4536     /* add local non-zero vals of this proc's seqmat into ba */
4537     anzi   = ai[arow+1] - ai[arow];
4538     aj     = a->j + ai[arow];
4539     aa     = a->a + ai[arow];
4540     nextaj = 0;
4541     for (j=0; nextaj<anzi; j++) {
4542       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4543         ba_i[j] += aa[nextaj++];
4544       }
4545     }
4546 
4547     /* add received vals into ba */
4548     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4549       /* i-th row */
4550       if (i == *nextrow[k]) {
4551         anzi   = *(nextai[k]+1) - *nextai[k];
4552         aj     = buf_rj[k] + *(nextai[k]);
4553         aa     = abuf_r[k] + *(nextai[k]);
4554         nextaj = 0;
4555         for (j=0; nextaj<anzi; j++) {
4556           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4557             ba_i[j] += aa[nextaj++];
4558           }
4559         }
4560         nextrow[k]++; nextai[k]++;
4561       }
4562     }
4563     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4564   }
4565   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4566   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4567 
4568   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4569   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4570   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4571   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4572   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4573   PetscFunctionReturn(0);
4574 }
4575 
4576 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4577 {
4578   PetscErrorCode      ierr;
4579   Mat                 B_mpi;
4580   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4581   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4582   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4583   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4584   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4585   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4586   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4587   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4588   MPI_Status          *status;
4589   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4590   PetscBT             lnkbt;
4591   Mat_Merge_SeqsToMPI *merge;
4592   PetscContainer      container;
4593 
4594   PetscFunctionBegin;
4595   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4596 
4597   /* make sure it is a PETSc comm */
4598   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4599   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4600   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4601 
4602   ierr = PetscNew(&merge);CHKERRQ(ierr);
4603   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4604 
4605   /* determine row ownership */
4606   /*---------------------------------------------------------*/
4607   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4608   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4609   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4610   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4611   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4612   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4613   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4614 
4615   m      = merge->rowmap->n;
4616   owners = merge->rowmap->range;
4617 
4618   /* determine the number of messages to send, their lengths */
4619   /*---------------------------------------------------------*/
4620   len_s = merge->len_s;
4621 
4622   len          = 0; /* length of buf_si[] */
4623   merge->nsend = 0;
4624   for (proc=0; proc<size; proc++) {
4625     len_si[proc] = 0;
4626     if (proc == rank) {
4627       len_s[proc] = 0;
4628     } else {
4629       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4630       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4631     }
4632     if (len_s[proc]) {
4633       merge->nsend++;
4634       nrows = 0;
4635       for (i=owners[proc]; i<owners[proc+1]; i++) {
4636         if (ai[i+1] > ai[i]) nrows++;
4637       }
4638       len_si[proc] = 2*(nrows+1);
4639       len         += len_si[proc];
4640     }
4641   }
4642 
4643   /* determine the number and length of messages to receive for ij-structure */
4644   /*-------------------------------------------------------------------------*/
4645   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4646   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4647 
4648   /* post the Irecv of j-structure */
4649   /*-------------------------------*/
4650   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4651   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4652 
4653   /* post the Isend of j-structure */
4654   /*--------------------------------*/
4655   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4656 
4657   for (proc=0, k=0; proc<size; proc++) {
4658     if (!len_s[proc]) continue;
4659     i    = owners[proc];
4660     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4661     k++;
4662   }
4663 
4664   /* receives and sends of j-structure are complete */
4665   /*------------------------------------------------*/
4666   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4667   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4668 
4669   /* send and recv i-structure */
4670   /*---------------------------*/
4671   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4672   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4673 
4674   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4675   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4676   for (proc=0,k=0; proc<size; proc++) {
4677     if (!len_s[proc]) continue;
4678     /* form outgoing message for i-structure:
4679          buf_si[0]:                 nrows to be sent
4680                [1:nrows]:           row index (global)
4681                [nrows+1:2*nrows+1]: i-structure index
4682     */
4683     /*-------------------------------------------*/
4684     nrows       = len_si[proc]/2 - 1;
4685     buf_si_i    = buf_si + nrows+1;
4686     buf_si[0]   = nrows;
4687     buf_si_i[0] = 0;
4688     nrows       = 0;
4689     for (i=owners[proc]; i<owners[proc+1]; i++) {
4690       anzi = ai[i+1] - ai[i];
4691       if (anzi) {
4692         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4693         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4694         nrows++;
4695       }
4696     }
4697     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4698     k++;
4699     buf_si += len_si[proc];
4700   }
4701 
4702   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4703   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4704 
4705   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4706   for (i=0; i<merge->nrecv; i++) {
4707     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4708   }
4709 
4710   ierr = PetscFree(len_si);CHKERRQ(ierr);
4711   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4712   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4713   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4714   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4715   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4716   ierr = PetscFree(status);CHKERRQ(ierr);
4717 
4718   /* compute a local seq matrix in each processor */
4719   /*----------------------------------------------*/
4720   /* allocate bi array and free space for accumulating nonzero column info */
4721   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4722   bi[0] = 0;
4723 
4724   /* create and initialize a linked list */
4725   nlnk = N+1;
4726   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4727 
4728   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4729   len  = ai[owners[rank+1]] - ai[owners[rank]];
4730   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4731 
4732   current_space = free_space;
4733 
4734   /* determine symbolic info for each local row */
4735   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4736 
4737   for (k=0; k<merge->nrecv; k++) {
4738     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4739     nrows       = *buf_ri_k[k];
4740     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4741     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4742   }
4743 
4744   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4745   len  = 0;
4746   for (i=0; i<m; i++) {
4747     bnzi = 0;
4748     /* add local non-zero cols of this proc's seqmat into lnk */
4749     arow  = owners[rank] + i;
4750     anzi  = ai[arow+1] - ai[arow];
4751     aj    = a->j + ai[arow];
4752     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4753     bnzi += nlnk;
4754     /* add received col data into lnk */
4755     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4756       if (i == *nextrow[k]) { /* i-th row */
4757         anzi  = *(nextai[k]+1) - *nextai[k];
4758         aj    = buf_rj[k] + *nextai[k];
4759         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4760         bnzi += nlnk;
4761         nextrow[k]++; nextai[k]++;
4762       }
4763     }
4764     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4765 
4766     /* if free space is not available, make more free space */
4767     if (current_space->local_remaining<bnzi) {
4768       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4769       nspacedouble++;
4770     }
4771     /* copy data into free space, then initialize lnk */
4772     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4773     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4774 
4775     current_space->array           += bnzi;
4776     current_space->local_used      += bnzi;
4777     current_space->local_remaining -= bnzi;
4778 
4779     bi[i+1] = bi[i] + bnzi;
4780   }
4781 
4782   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4783 
4784   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4785   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4786   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4787 
4788   /* create symbolic parallel matrix B_mpi */
4789   /*---------------------------------------*/
4790   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4791   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4792   if (n==PETSC_DECIDE) {
4793     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4794   } else {
4795     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4796   }
4797   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4798   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4799   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4800   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4801   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4802 
4803   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4804   B_mpi->assembled    = PETSC_FALSE;
4805   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4806   merge->bi           = bi;
4807   merge->bj           = bj;
4808   merge->buf_ri       = buf_ri;
4809   merge->buf_rj       = buf_rj;
4810   merge->coi          = NULL;
4811   merge->coj          = NULL;
4812   merge->owners_co    = NULL;
4813 
4814   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4815 
4816   /* attach the supporting struct to B_mpi for reuse */
4817   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4818   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4819   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4820   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4821   *mpimat = B_mpi;
4822 
4823   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4824   PetscFunctionReturn(0);
4825 }
4826 
4827 /*@C
4828       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4829                  matrices from each processor
4830 
4831     Collective on MPI_Comm
4832 
4833    Input Parameters:
4834 +    comm - the communicators the parallel matrix will live on
4835 .    seqmat - the input sequential matrices
4836 .    m - number of local rows (or PETSC_DECIDE)
4837 .    n - number of local columns (or PETSC_DECIDE)
4838 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4839 
4840    Output Parameter:
4841 .    mpimat - the parallel matrix generated
4842 
4843     Level: advanced
4844 
4845    Notes:
4846      The dimensions of the sequential matrix in each processor MUST be the same.
4847      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4848      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4849 @*/
4850 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4851 {
4852   PetscErrorCode ierr;
4853   PetscMPIInt    size;
4854 
4855   PetscFunctionBegin;
4856   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4857   if (size == 1) {
4858     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4859     if (scall == MAT_INITIAL_MATRIX) {
4860       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4861     } else {
4862       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4863     }
4864     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4865     PetscFunctionReturn(0);
4866   }
4867   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4868   if (scall == MAT_INITIAL_MATRIX) {
4869     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4870   }
4871   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4872   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4873   PetscFunctionReturn(0);
4874 }
4875 
4876 /*@
4877      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4878           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4879           with MatGetSize()
4880 
4881     Not Collective
4882 
4883    Input Parameters:
4884 +    A - the matrix
4885 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4886 
4887    Output Parameter:
4888 .    A_loc - the local sequential matrix generated
4889 
4890     Level: developer
4891 
4892 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4893 
4894 @*/
4895 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4896 {
4897   PetscErrorCode ierr;
4898   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4899   Mat_SeqAIJ     *mat,*a,*b;
4900   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4901   MatScalar      *aa,*ba,*cam;
4902   PetscScalar    *ca;
4903   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4904   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4905   PetscBool      match;
4906   MPI_Comm       comm;
4907   PetscMPIInt    size;
4908 
4909   PetscFunctionBegin;
4910   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4911   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4912   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4913   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4914   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4915 
4916   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4917   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4918   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4919   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4920   aa = a->a; ba = b->a;
4921   if (scall == MAT_INITIAL_MATRIX) {
4922     if (size == 1) {
4923       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4924       PetscFunctionReturn(0);
4925     }
4926 
4927     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4928     ci[0] = 0;
4929     for (i=0; i<am; i++) {
4930       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4931     }
4932     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4933     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4934     k    = 0;
4935     for (i=0; i<am; i++) {
4936       ncols_o = bi[i+1] - bi[i];
4937       ncols_d = ai[i+1] - ai[i];
4938       /* off-diagonal portion of A */
4939       for (jo=0; jo<ncols_o; jo++) {
4940         col = cmap[*bj];
4941         if (col >= cstart) break;
4942         cj[k]   = col; bj++;
4943         ca[k++] = *ba++;
4944       }
4945       /* diagonal portion of A */
4946       for (j=0; j<ncols_d; j++) {
4947         cj[k]   = cstart + *aj++;
4948         ca[k++] = *aa++;
4949       }
4950       /* off-diagonal portion of A */
4951       for (j=jo; j<ncols_o; j++) {
4952         cj[k]   = cmap[*bj++];
4953         ca[k++] = *ba++;
4954       }
4955     }
4956     /* put together the new matrix */
4957     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4958     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4959     /* Since these are PETSc arrays, change flags to free them as necessary. */
4960     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4961     mat->free_a  = PETSC_TRUE;
4962     mat->free_ij = PETSC_TRUE;
4963     mat->nonew   = 0;
4964   } else if (scall == MAT_REUSE_MATRIX) {
4965     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4966     ci = mat->i; cj = mat->j; cam = mat->a;
4967     for (i=0; i<am; i++) {
4968       /* off-diagonal portion of A */
4969       ncols_o = bi[i+1] - bi[i];
4970       for (jo=0; jo<ncols_o; jo++) {
4971         col = cmap[*bj];
4972         if (col >= cstart) break;
4973         *cam++ = *ba++; bj++;
4974       }
4975       /* diagonal portion of A */
4976       ncols_d = ai[i+1] - ai[i];
4977       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4978       /* off-diagonal portion of A */
4979       for (j=jo; j<ncols_o; j++) {
4980         *cam++ = *ba++; bj++;
4981       }
4982     }
4983   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4984   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4985   PetscFunctionReturn(0);
4986 }
4987 
4988 /*@C
4989      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4990 
4991     Not Collective
4992 
4993    Input Parameters:
4994 +    A - the matrix
4995 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4996 -    row, col - index sets of rows and columns to extract (or NULL)
4997 
4998    Output Parameter:
4999 .    A_loc - the local sequential matrix generated
5000 
5001     Level: developer
5002 
5003 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5004 
5005 @*/
5006 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5007 {
5008   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5009   PetscErrorCode ierr;
5010   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5011   IS             isrowa,iscola;
5012   Mat            *aloc;
5013   PetscBool      match;
5014 
5015   PetscFunctionBegin;
5016   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5017   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5018   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5019   if (!row) {
5020     start = A->rmap->rstart; end = A->rmap->rend;
5021     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5022   } else {
5023     isrowa = *row;
5024   }
5025   if (!col) {
5026     start = A->cmap->rstart;
5027     cmap  = a->garray;
5028     nzA   = a->A->cmap->n;
5029     nzB   = a->B->cmap->n;
5030     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5031     ncols = 0;
5032     for (i=0; i<nzB; i++) {
5033       if (cmap[i] < start) idx[ncols++] = cmap[i];
5034       else break;
5035     }
5036     imark = i;
5037     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5038     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5039     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5040   } else {
5041     iscola = *col;
5042   }
5043   if (scall != MAT_INITIAL_MATRIX) {
5044     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5045     aloc[0] = *A_loc;
5046   }
5047   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5048   *A_loc = aloc[0];
5049   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5050   if (!row) {
5051     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5052   }
5053   if (!col) {
5054     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5055   }
5056   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5057   PetscFunctionReturn(0);
5058 }
5059 
5060 /*@C
5061     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5062 
5063     Collective on Mat
5064 
5065    Input Parameters:
5066 +    A,B - the matrices in mpiaij format
5067 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5068 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5069 
5070    Output Parameter:
5071 +    rowb, colb - index sets of rows and columns of B to extract
5072 -    B_seq - the sequential matrix generated
5073 
5074     Level: developer
5075 
5076 @*/
5077 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5078 {
5079   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5080   PetscErrorCode ierr;
5081   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5082   IS             isrowb,iscolb;
5083   Mat            *bseq=NULL;
5084 
5085   PetscFunctionBegin;
5086   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5087     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5088   }
5089   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5090 
5091   if (scall == MAT_INITIAL_MATRIX) {
5092     start = A->cmap->rstart;
5093     cmap  = a->garray;
5094     nzA   = a->A->cmap->n;
5095     nzB   = a->B->cmap->n;
5096     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5097     ncols = 0;
5098     for (i=0; i<nzB; i++) {  /* row < local row index */
5099       if (cmap[i] < start) idx[ncols++] = cmap[i];
5100       else break;
5101     }
5102     imark = i;
5103     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5104     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5105     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5106     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5107   } else {
5108     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5109     isrowb  = *rowb; iscolb = *colb;
5110     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5111     bseq[0] = *B_seq;
5112   }
5113   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5114   *B_seq = bseq[0];
5115   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5116   if (!rowb) {
5117     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5118   } else {
5119     *rowb = isrowb;
5120   }
5121   if (!colb) {
5122     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5123   } else {
5124     *colb = iscolb;
5125   }
5126   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5127   PetscFunctionReturn(0);
5128 }
5129 
5130 /*
5131     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5132     of the OFF-DIAGONAL portion of local A
5133 
5134     Collective on Mat
5135 
5136    Input Parameters:
5137 +    A,B - the matrices in mpiaij format
5138 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5139 
5140    Output Parameter:
5141 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5142 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5143 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5144 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5145 
5146     Level: developer
5147 
5148 */
5149 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5150 {
5151   VecScatter_MPI_General *gen_to,*gen_from;
5152   PetscErrorCode         ierr;
5153   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5154   Mat_SeqAIJ             *b_oth;
5155   VecScatter             ctx;
5156   MPI_Comm               comm;
5157   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5158   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5159   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5160   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5161   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5162   MPI_Request            *rwaits = NULL,*swaits = NULL;
5163   MPI_Status             *sstatus,rstatus;
5164   PetscMPIInt            jj,size;
5165   VecScatterType         type;
5166   PetscBool              mpi1;
5167 
5168   PetscFunctionBegin;
5169   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5170   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5171 
5172   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5173     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5174   }
5175   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5176   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5177 
5178   if (size == 1) {
5179     startsj_s = NULL;
5180     bufa_ptr  = NULL;
5181     *B_oth    = NULL;
5182     PetscFunctionReturn(0);
5183   }
5184 
5185   ctx = a->Mvctx;
5186   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5187   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5188   if (!mpi1) {
5189     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5190      thus create a->Mvctx_mpi1 */
5191     if (!a->Mvctx_mpi1) {
5192       a->Mvctx_mpi1_flg = PETSC_TRUE;
5193       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5194     }
5195     ctx = a->Mvctx_mpi1;
5196   }
5197   tag = ((PetscObject)ctx)->tag;
5198 
5199   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5200   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5201   nrecvs   = gen_from->n;
5202   nsends   = gen_to->n;
5203 
5204   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5205   srow    = gen_to->indices;    /* local row index to be sent */
5206   sstarts = gen_to->starts;
5207   sprocs  = gen_to->procs;
5208   sstatus = gen_to->sstatus;
5209   sbs     = gen_to->bs;
5210   rstarts = gen_from->starts;
5211   rprocs  = gen_from->procs;
5212   rbs     = gen_from->bs;
5213 
5214   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5215   if (scall == MAT_INITIAL_MATRIX) {
5216     /* i-array */
5217     /*---------*/
5218     /*  post receives */
5219     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5220     for (i=0; i<nrecvs; i++) {
5221       rowlen = rvalues + rstarts[i]*rbs;
5222       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5223       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5224     }
5225 
5226     /* pack the outgoing message */
5227     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5228 
5229     sstartsj[0] = 0;
5230     rstartsj[0] = 0;
5231     len         = 0; /* total length of j or a array to be sent */
5232     k           = 0;
5233     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5234     for (i=0; i<nsends; i++) {
5235       rowlen = svalues + sstarts[i]*sbs;
5236       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5237       for (j=0; j<nrows; j++) {
5238         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5239         for (l=0; l<sbs; l++) {
5240           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5241 
5242           rowlen[j*sbs+l] = ncols;
5243 
5244           len += ncols;
5245           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5246         }
5247         k++;
5248       }
5249       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5250 
5251       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5252     }
5253     /* recvs and sends of i-array are completed */
5254     i = nrecvs;
5255     while (i--) {
5256       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5257     }
5258     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5259     ierr = PetscFree(svalues);CHKERRQ(ierr);
5260 
5261     /* allocate buffers for sending j and a arrays */
5262     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5263     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5264 
5265     /* create i-array of B_oth */
5266     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5267 
5268     b_othi[0] = 0;
5269     len       = 0; /* total length of j or a array to be received */
5270     k         = 0;
5271     for (i=0; i<nrecvs; i++) {
5272       rowlen = rvalues + rstarts[i]*rbs;
5273       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5274       for (j=0; j<nrows; j++) {
5275         b_othi[k+1] = b_othi[k] + rowlen[j];
5276         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5277         k++;
5278       }
5279       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5280     }
5281     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5282 
5283     /* allocate space for j and a arrrays of B_oth */
5284     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5285     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5286 
5287     /* j-array */
5288     /*---------*/
5289     /*  post receives of j-array */
5290     for (i=0; i<nrecvs; i++) {
5291       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5292       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5293     }
5294 
5295     /* pack the outgoing message j-array */
5296     k = 0;
5297     for (i=0; i<nsends; i++) {
5298       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5299       bufJ  = bufj+sstartsj[i];
5300       for (j=0; j<nrows; j++) {
5301         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5302         for (ll=0; ll<sbs; ll++) {
5303           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5304           for (l=0; l<ncols; l++) {
5305             *bufJ++ = cols[l];
5306           }
5307           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5308         }
5309       }
5310       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5311     }
5312 
5313     /* recvs and sends of j-array are completed */
5314     i = nrecvs;
5315     while (i--) {
5316       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5317     }
5318     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5319   } else if (scall == MAT_REUSE_MATRIX) {
5320     sstartsj = *startsj_s;
5321     rstartsj = *startsj_r;
5322     bufa     = *bufa_ptr;
5323     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5324     b_otha   = b_oth->a;
5325   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5326 
5327   /* a-array */
5328   /*---------*/
5329   /*  post receives of a-array */
5330   for (i=0; i<nrecvs; i++) {
5331     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5332     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5333   }
5334 
5335   /* pack the outgoing message a-array */
5336   k = 0;
5337   for (i=0; i<nsends; i++) {
5338     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5339     bufA  = bufa+sstartsj[i];
5340     for (j=0; j<nrows; j++) {
5341       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5342       for (ll=0; ll<sbs; ll++) {
5343         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5344         for (l=0; l<ncols; l++) {
5345           *bufA++ = vals[l];
5346         }
5347         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5348       }
5349     }
5350     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5351   }
5352   /* recvs and sends of a-array are completed */
5353   i = nrecvs;
5354   while (i--) {
5355     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5356   }
5357   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5358   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5359 
5360   if (scall == MAT_INITIAL_MATRIX) {
5361     /* put together the new matrix */
5362     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5363 
5364     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5365     /* Since these are PETSc arrays, change flags to free them as necessary. */
5366     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5367     b_oth->free_a  = PETSC_TRUE;
5368     b_oth->free_ij = PETSC_TRUE;
5369     b_oth->nonew   = 0;
5370 
5371     ierr = PetscFree(bufj);CHKERRQ(ierr);
5372     if (!startsj_s || !bufa_ptr) {
5373       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5374       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5375     } else {
5376       *startsj_s = sstartsj;
5377       *startsj_r = rstartsj;
5378       *bufa_ptr  = bufa;
5379     }
5380   }
5381   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5382   PetscFunctionReturn(0);
5383 }
5384 
5385 /*@C
5386   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5387 
5388   Not Collective
5389 
5390   Input Parameters:
5391 . A - The matrix in mpiaij format
5392 
5393   Output Parameter:
5394 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5395 . colmap - A map from global column index to local index into lvec
5396 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5397 
5398   Level: developer
5399 
5400 @*/
5401 #if defined(PETSC_USE_CTABLE)
5402 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5403 #else
5404 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5405 #endif
5406 {
5407   Mat_MPIAIJ *a;
5408 
5409   PetscFunctionBegin;
5410   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5411   PetscValidPointer(lvec, 2);
5412   PetscValidPointer(colmap, 3);
5413   PetscValidPointer(multScatter, 4);
5414   a = (Mat_MPIAIJ*) A->data;
5415   if (lvec) *lvec = a->lvec;
5416   if (colmap) *colmap = a->colmap;
5417   if (multScatter) *multScatter = a->Mvctx;
5418   PetscFunctionReturn(0);
5419 }
5420 
5421 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5423 #if defined(PETSC_HAVE_MKL_SPARSE)
5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5425 #endif
5426 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5427 #if defined(PETSC_HAVE_ELEMENTAL)
5428 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5429 #endif
5430 #if defined(PETSC_HAVE_HYPRE)
5431 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5432 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5433 #endif
5434 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5435 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5436 
5437 /*
5438     Computes (B'*A')' since computing B*A directly is untenable
5439 
5440                n                       p                          p
5441         (              )       (              )         (                  )
5442       m (      A       )  *  n (       B      )   =   m (         C        )
5443         (              )       (              )         (                  )
5444 
5445 */
5446 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5447 {
5448   PetscErrorCode ierr;
5449   Mat            At,Bt,Ct;
5450 
5451   PetscFunctionBegin;
5452   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5453   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5454   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5455   ierr = MatDestroy(&At);CHKERRQ(ierr);
5456   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5457   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5458   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5459   PetscFunctionReturn(0);
5460 }
5461 
5462 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5463 {
5464   PetscErrorCode ierr;
5465   PetscInt       m=A->rmap->n,n=B->cmap->n;
5466   Mat            Cmat;
5467 
5468   PetscFunctionBegin;
5469   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5470   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5471   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5472   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5473   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5474   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5475   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5476   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5477 
5478   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5479 
5480   *C = Cmat;
5481   PetscFunctionReturn(0);
5482 }
5483 
5484 /* ----------------------------------------------------------------*/
5485 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5486 {
5487   PetscErrorCode ierr;
5488 
5489   PetscFunctionBegin;
5490   if (scall == MAT_INITIAL_MATRIX) {
5491     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5492     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5493     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5494   }
5495   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5496   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5497   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5498   PetscFunctionReturn(0);
5499 }
5500 
5501 /*MC
5502    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5503 
5504    Options Database Keys:
5505 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5506 
5507   Level: beginner
5508 
5509 .seealso: MatCreateAIJ()
5510 M*/
5511 
5512 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5513 {
5514   Mat_MPIAIJ     *b;
5515   PetscErrorCode ierr;
5516   PetscMPIInt    size;
5517 
5518   PetscFunctionBegin;
5519   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5520 
5521   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5522   B->data       = (void*)b;
5523   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5524   B->assembled  = PETSC_FALSE;
5525   B->insertmode = NOT_SET_VALUES;
5526   b->size       = size;
5527 
5528   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5529 
5530   /* build cache for off array entries formed */
5531   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5532 
5533   b->donotstash  = PETSC_FALSE;
5534   b->colmap      = 0;
5535   b->garray      = 0;
5536   b->roworiented = PETSC_TRUE;
5537 
5538   /* stuff used for matrix vector multiply */
5539   b->lvec  = NULL;
5540   b->Mvctx = NULL;
5541 
5542   /* stuff for MatGetRow() */
5543   b->rowindices   = 0;
5544   b->rowvalues    = 0;
5545   b->getrowactive = PETSC_FALSE;
5546 
5547   /* flexible pointer used in CUSP/CUSPARSE classes */
5548   b->spptr = NULL;
5549 
5550   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5559 #if defined(PETSC_HAVE_MKL_SPARSE)
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5561 #endif
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5563   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5564 #if defined(PETSC_HAVE_ELEMENTAL)
5565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5566 #endif
5567 #if defined(PETSC_HAVE_HYPRE)
5568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5569 #endif
5570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5571   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5573   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5575 #if defined(PETSC_HAVE_HYPRE)
5576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5577 #endif
5578   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5579   PetscFunctionReturn(0);
5580 }
5581 
5582 /*@C
5583      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5584          and "off-diagonal" part of the matrix in CSR format.
5585 
5586    Collective on MPI_Comm
5587 
5588    Input Parameters:
5589 +  comm - MPI communicator
5590 .  m - number of local rows (Cannot be PETSC_DECIDE)
5591 .  n - This value should be the same as the local size used in creating the
5592        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5593        calculated if N is given) For square matrices n is almost always m.
5594 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5595 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5596 .   i - row indices for "diagonal" portion of matrix
5597 .   j - column indices
5598 .   a - matrix values
5599 .   oi - row indices for "off-diagonal" portion of matrix
5600 .   oj - column indices
5601 -   oa - matrix values
5602 
5603    Output Parameter:
5604 .   mat - the matrix
5605 
5606    Level: advanced
5607 
5608    Notes:
5609        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5610        must free the arrays once the matrix has been destroyed and not before.
5611 
5612        The i and j indices are 0 based
5613 
5614        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5615 
5616        This sets local rows and cannot be used to set off-processor values.
5617 
5618        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5619        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5620        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5621        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5622        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5623        communication if it is known that only local entries will be set.
5624 
5625 .keywords: matrix, aij, compressed row, sparse, parallel
5626 
5627 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5628           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5629 @*/
5630 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5631 {
5632   PetscErrorCode ierr;
5633   Mat_MPIAIJ     *maij;
5634 
5635   PetscFunctionBegin;
5636   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5637   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5638   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5639   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5640   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5641   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5642   maij = (Mat_MPIAIJ*) (*mat)->data;
5643 
5644   (*mat)->preallocated = PETSC_TRUE;
5645 
5646   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5647   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5648 
5649   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5650   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5651 
5652   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5653   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5654   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5655   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5656 
5657   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5658   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5659   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5660   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5661   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5662   PetscFunctionReturn(0);
5663 }
5664 
5665 /*
5666     Special version for direct calls from Fortran
5667 */
5668 #include <petsc/private/fortranimpl.h>
5669 
5670 /* Change these macros so can be used in void function */
5671 #undef CHKERRQ
5672 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5673 #undef SETERRQ2
5674 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5675 #undef SETERRQ3
5676 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5677 #undef SETERRQ
5678 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5679 
5680 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5681 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5682 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5683 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5684 #else
5685 #endif
5686 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5687 {
5688   Mat            mat  = *mmat;
5689   PetscInt       m    = *mm, n = *mn;
5690   InsertMode     addv = *maddv;
5691   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5692   PetscScalar    value;
5693   PetscErrorCode ierr;
5694 
5695   MatCheckPreallocated(mat,1);
5696   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5697 
5698 #if defined(PETSC_USE_DEBUG)
5699   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5700 #endif
5701   {
5702     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5703     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5704     PetscBool roworiented = aij->roworiented;
5705 
5706     /* Some Variables required in the macro */
5707     Mat        A                 = aij->A;
5708     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5709     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5710     MatScalar  *aa               = a->a;
5711     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5712     Mat        B                 = aij->B;
5713     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5714     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5715     MatScalar  *ba               = b->a;
5716 
5717     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5718     PetscInt  nonew = a->nonew;
5719     MatScalar *ap1,*ap2;
5720 
5721     PetscFunctionBegin;
5722     for (i=0; i<m; i++) {
5723       if (im[i] < 0) continue;
5724 #if defined(PETSC_USE_DEBUG)
5725       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5726 #endif
5727       if (im[i] >= rstart && im[i] < rend) {
5728         row      = im[i] - rstart;
5729         lastcol1 = -1;
5730         rp1      = aj + ai[row];
5731         ap1      = aa + ai[row];
5732         rmax1    = aimax[row];
5733         nrow1    = ailen[row];
5734         low1     = 0;
5735         high1    = nrow1;
5736         lastcol2 = -1;
5737         rp2      = bj + bi[row];
5738         ap2      = ba + bi[row];
5739         rmax2    = bimax[row];
5740         nrow2    = bilen[row];
5741         low2     = 0;
5742         high2    = nrow2;
5743 
5744         for (j=0; j<n; j++) {
5745           if (roworiented) value = v[i*n+j];
5746           else value = v[i+j*m];
5747           if (in[j] >= cstart && in[j] < cend) {
5748             col = in[j] - cstart;
5749             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5750             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5751           } else if (in[j] < 0) continue;
5752 #if defined(PETSC_USE_DEBUG)
5753           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5754           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5755 #endif
5756           else {
5757             if (mat->was_assembled) {
5758               if (!aij->colmap) {
5759                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5760               }
5761 #if defined(PETSC_USE_CTABLE)
5762               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5763               col--;
5764 #else
5765               col = aij->colmap[in[j]] - 1;
5766 #endif
5767               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5768               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5769                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5770                 col  =  in[j];
5771                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5772                 B     = aij->B;
5773                 b     = (Mat_SeqAIJ*)B->data;
5774                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5775                 rp2   = bj + bi[row];
5776                 ap2   = ba + bi[row];
5777                 rmax2 = bimax[row];
5778                 nrow2 = bilen[row];
5779                 low2  = 0;
5780                 high2 = nrow2;
5781                 bm    = aij->B->rmap->n;
5782                 ba    = b->a;
5783               }
5784             } else col = in[j];
5785             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5786           }
5787         }
5788       } else if (!aij->donotstash) {
5789         if (roworiented) {
5790           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5791         } else {
5792           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5793         }
5794       }
5795     }
5796   }
5797   PetscFunctionReturnVoid();
5798 }
5799 
5800