xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dc424cfae46ffd055acbb99fbc61fc85fc92f9ad)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1042 {
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1053   PetscErrorCode ierr;
1054 
1055   PetscFunctionBegin;
1056   /* do nondiagonal part */
1057   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1058   /* send it on its way */
1059   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   /* do local part */
1061   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   /* receive remote parts */
1063   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1095 {
1096   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100 #if defined(PETSC_USE_LOG)
1101   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1102 #endif
1103   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1105   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1106   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1107 #if defined(PETSC_USE_CTABLE)
1108   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1109 #else
1110   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1111 #endif
1112   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1114   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1115   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1116   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1117   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1118 
1119   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1127 #if defined(PETSC_HAVE_ELEMENTAL)
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1129 #endif
1130 #if defined(PETSC_HAVE_HYPRE)
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1133 #endif
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1138 {
1139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1140   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1141   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1142   PetscErrorCode ierr;
1143   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1144   int            fd;
1145   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1146   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1147   PetscScalar    *column_values;
1148   PetscInt       message_count,flowcontrolcount;
1149   FILE           *file;
1150 
1151   PetscFunctionBegin;
1152   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1153   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1154   nz   = A->nz + B->nz;
1155   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1156   if (!rank) {
1157     header[0] = MAT_FILE_CLASSID;
1158     header[1] = mat->rmap->N;
1159     header[2] = mat->cmap->N;
1160 
1161     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1162     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1163     /* get largest number of rows any processor has */
1164     rlen  = mat->rmap->n;
1165     range = mat->rmap->range;
1166     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1167   } else {
1168     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1169     rlen = mat->rmap->n;
1170   }
1171 
1172   /* load up the local row counts */
1173   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1174   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1175 
1176   /* store the row lengths to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     for (i=1; i<size; i++) {
1181       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1182       rlen = range[i+1] - range[i];
1183       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1185     }
1186     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1187   } else {
1188     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1189     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1191   }
1192   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1193 
1194   /* load up the local column indices */
1195   nzmax = nz; /* th processor needs space a largest processor needs */
1196   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1198   cnt   = 0;
1199   for (i=0; i<mat->rmap->n; i++) {
1200     for (j=B->i[i]; j<B->i[i+1]; j++) {
1201       if ((col = garray[B->j[j]]) > cstart) break;
1202       column_indices[cnt++] = col;
1203     }
1204     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1205     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1206   }
1207   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1208 
1209   /* store the column indices to the file */
1210   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1211   if (!rank) {
1212     MPI_Status status;
1213     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1214     for (i=1; i<size; i++) {
1215       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1216       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1217       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1218       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     }
1221     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1222   } else {
1223     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1224     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1227   }
1228   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1229 
1230   /* load up the local column values */
1231   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1232   cnt  = 0;
1233   for (i=0; i<mat->rmap->n; i++) {
1234     for (j=B->i[i]; j<B->i[i+1]; j++) {
1235       if (garray[B->j[j]] > cstart) break;
1236       column_values[cnt++] = B->a[j];
1237     }
1238     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1239     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1240   }
1241   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1242 
1243   /* store the column values to the file */
1244   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1245   if (!rank) {
1246     MPI_Status status;
1247     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1248     for (i=1; i<size; i++) {
1249       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1250       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1251       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1252       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1254     }
1255     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1256   } else {
1257     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1258     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1261   }
1262   ierr = PetscFree(column_values);CHKERRQ(ierr);
1263 
1264   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1265   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #include <petscdraw.h>
1270 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1271 {
1272   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode    ierr;
1274   PetscMPIInt       rank = aij->rank,size = aij->size;
1275   PetscBool         isdraw,iascii,isbinary;
1276   PetscViewer       sviewer;
1277   PetscViewerFormat format;
1278 
1279   PetscFunctionBegin;
1280   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1281   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1283   if (iascii) {
1284     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1285     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1286       MatInfo   info;
1287       PetscBool inodes;
1288 
1289       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1290       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1291       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1292       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1293       if (!inodes) {
1294         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1295                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1296       } else {
1297         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1298                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1299       }
1300       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1301       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1302       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1304       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1306       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1307       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1308       PetscFunctionReturn(0);
1309     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1310       PetscInt inodecount,inodelimit,*inodes;
1311       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1312       if (inodes) {
1313         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1314       } else {
1315         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1316       }
1317       PetscFunctionReturn(0);
1318     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1319       PetscFunctionReturn(0);
1320     }
1321   } else if (isbinary) {
1322     if (size == 1) {
1323       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1324       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1325     } else {
1326       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1327     }
1328     PetscFunctionReturn(0);
1329   } else if (isdraw) {
1330     PetscDraw draw;
1331     PetscBool isnull;
1332     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1333     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1334     if (isnull) PetscFunctionReturn(0);
1335   }
1336 
1337   {
1338     /* assemble the entire matrix onto first processor. */
1339     Mat        A;
1340     Mat_SeqAIJ *Aloc;
1341     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1342     MatScalar  *a;
1343 
1344     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1345     if (!rank) {
1346       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1347     } else {
1348       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1349     }
1350     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1351     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1352     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1353     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1354     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1355 
1356     /* copy over the A part */
1357     Aloc = (Mat_SeqAIJ*)aij->A->data;
1358     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1359     row  = mat->rmap->rstart;
1360     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1361     for (i=0; i<m; i++) {
1362       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1363       row++;
1364       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1365     }
1366     aj = Aloc->j;
1367     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1368 
1369     /* copy over the B part */
1370     Aloc = (Mat_SeqAIJ*)aij->B->data;
1371     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1372     row  = mat->rmap->rstart;
1373     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1374     ct   = cols;
1375     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1376     for (i=0; i<m; i++) {
1377       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1378       row++;
1379       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1380     }
1381     ierr = PetscFree(ct);CHKERRQ(ierr);
1382     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1384     /*
1385        Everyone has to call to draw the matrix since the graphics waits are
1386        synchronized across all processors that share the PetscDraw object
1387     */
1388     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1389     if (!rank) {
1390       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1391       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1394     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1395     ierr = MatDestroy(&A);CHKERRQ(ierr);
1396   }
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1401 {
1402   PetscErrorCode ierr;
1403   PetscBool      iascii,isdraw,issocket,isbinary;
1404 
1405   PetscFunctionBegin;
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1407   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1409   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1410   if (iascii || isdraw || isbinary || issocket) {
1411     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1417 {
1418   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1419   PetscErrorCode ierr;
1420   Vec            bb1 = 0;
1421   PetscBool      hasop;
1422 
1423   PetscFunctionBegin;
1424   if (flag == SOR_APPLY_UPPER) {
1425     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1426     PetscFunctionReturn(0);
1427   }
1428 
1429   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1430     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1431   }
1432 
1433   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1434     if (flag & SOR_ZERO_INITIAL_GUESS) {
1435       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1436       its--;
1437     }
1438 
1439     while (its--) {
1440       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1441       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1442 
1443       /* update rhs: bb1 = bb - B*x */
1444       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1445       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1446 
1447       /* local sweep */
1448       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1449     }
1450   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1451     if (flag & SOR_ZERO_INITIAL_GUESS) {
1452       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1453       its--;
1454     }
1455     while (its--) {
1456       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1461       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1462 
1463       /* local sweep */
1464       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1465     }
1466   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1467     if (flag & SOR_ZERO_INITIAL_GUESS) {
1468       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1469       its--;
1470     }
1471     while (its--) {
1472       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1477       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1478 
1479       /* local sweep */
1480       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1481     }
1482   } else if (flag & SOR_EISENSTAT) {
1483     Vec xx1;
1484 
1485     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1486     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1487 
1488     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490     if (!mat->diag) {
1491       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1492       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1493     }
1494     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1495     if (hasop) {
1496       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1497     } else {
1498       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1499     }
1500     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1501 
1502     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1503 
1504     /* local sweep */
1505     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1506     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1507     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1508   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1509 
1510   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1511 
1512   matin->factorerrortype = mat->A->factorerrortype;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1517 {
1518   Mat            aA,aB,Aperm;
1519   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1520   PetscScalar    *aa,*ba;
1521   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1522   PetscSF        rowsf,sf;
1523   IS             parcolp = NULL;
1524   PetscBool      done;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1529   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1530   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1531   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1532 
1533   /* Invert row permutation to find out where my rows should go */
1534   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1535   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1536   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1537   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1538   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1539   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1540 
1541   /* Invert column permutation to find out where my columns should go */
1542   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1543   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1544   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1545   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1546   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1547   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1548   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1549 
1550   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1551   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1552   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1553 
1554   /* Find out where my gcols should go */
1555   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1556   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1557   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1558   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1559   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1560   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1561   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1562   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1563 
1564   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1565   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1566   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1567   for (i=0; i<m; i++) {
1568     PetscInt row = rdest[i],rowner;
1569     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1570     for (j=ai[i]; j<ai[i+1]; j++) {
1571       PetscInt cowner,col = cdest[aj[j]];
1572       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1573       if (rowner == cowner) dnnz[i]++;
1574       else onnz[i]++;
1575     }
1576     for (j=bi[i]; j<bi[i+1]; j++) {
1577       PetscInt cowner,col = gcdest[bj[j]];
1578       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1579       if (rowner == cowner) dnnz[i]++;
1580       else onnz[i]++;
1581     }
1582   }
1583   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1584   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1588 
1589   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1590   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1591   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1594     PetscInt j0,rowlen;
1595     rowlen = ai[i+1] - ai[i];
1596     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1597       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1598       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1599     }
1600     rowlen = bi[i+1] - bi[i];
1601     for (j0=j=0; j<rowlen; j0=j) {
1602       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1603       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1604     }
1605   }
1606   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1609   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1610   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1611   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1612   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1614   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1615   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1616   *B = Aperm;
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1621 {
1622   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1627   if (ghosts) *ghosts = aij->garray;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1632 {
1633   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1634   Mat            A    = mat->A,B = mat->B;
1635   PetscErrorCode ierr;
1636   PetscReal      isend[5],irecv[5];
1637 
1638   PetscFunctionBegin;
1639   info->block_size = 1.0;
1640   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1641 
1642   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1643   isend[3] = info->memory;  isend[4] = info->mallocs;
1644 
1645   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1646 
1647   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;  isend[4] += info->mallocs;
1649   if (flag == MAT_LOCAL) {
1650     info->nz_used      = isend[0];
1651     info->nz_allocated = isend[1];
1652     info->nz_unneeded  = isend[2];
1653     info->memory       = isend[3];
1654     info->mallocs      = isend[4];
1655   } else if (flag == MAT_GLOBAL_MAX) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   } else if (flag == MAT_GLOBAL_SUM) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   }
1672   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1673   info->fill_ratio_needed = 0;
1674   info->factor_mallocs    = 0;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1679 {
1680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1681   PetscErrorCode ierr;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692     MatCheckPreallocated(A,1);
1693     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1694     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1695     break;
1696   case MAT_ROW_ORIENTED:
1697     MatCheckPreallocated(A,1);
1698     a->roworiented = flg;
1699 
1700     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1701     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1702     break;
1703   case MAT_NEW_DIAGONALS:
1704     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1705     break;
1706   case MAT_IGNORE_OFF_PROC_ENTRIES:
1707     a->donotstash = flg;
1708     break;
1709   case MAT_SPD:
1710     A->spd_set = PETSC_TRUE;
1711     A->spd     = flg;
1712     if (flg) {
1713       A->symmetric                  = PETSC_TRUE;
1714       A->structurally_symmetric     = PETSC_TRUE;
1715       A->symmetric_set              = PETSC_TRUE;
1716       A->structurally_symmetric_set = PETSC_TRUE;
1717     }
1718     break;
1719   case MAT_SYMMETRIC:
1720     MatCheckPreallocated(A,1);
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     break;
1723   case MAT_STRUCTURALLY_SYMMETRIC:
1724     MatCheckPreallocated(A,1);
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     break;
1727   case MAT_HERMITIAN:
1728     MatCheckPreallocated(A,1);
1729     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1730     break;
1731   case MAT_SYMMETRY_ETERNAL:
1732     MatCheckPreallocated(A,1);
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_SUBMAT_SINGLEIS:
1736     A->submat_singleis = flg;
1737     break;
1738   case MAT_STRUCTURE_ONLY:
1739     /* The option is handled directly by MatSetOption() */
1740     break;
1741   default:
1742     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1743   }
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1751   PetscErrorCode ierr;
1752   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1753   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1754   PetscInt       *cmap,*idx_p;
1755 
1756   PetscFunctionBegin;
1757   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1758   mat->getrowactive = PETSC_TRUE;
1759 
1760   if (!mat->rowvalues && (idx || v)) {
1761     /*
1762         allocate enough space to hold information from the longest row.
1763     */
1764     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1765     PetscInt   max = 1,tmp;
1766     for (i=0; i<matin->rmap->n; i++) {
1767       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1768       if (max < tmp) max = tmp;
1769     }
1770     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1771   }
1772 
1773   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1774   lrow = row - rstart;
1775 
1776   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1777   if (!v)   {pvA = 0; pvB = 0;}
1778   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1779   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1780   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1781   nztot = nzA + nzB;
1782 
1783   cmap = mat->garray;
1784   if (v  || idx) {
1785     if (nztot) {
1786       /* Sort by increasing column numbers, assuming A and B already sorted */
1787       PetscInt imark = -1;
1788       if (v) {
1789         *v = v_p = mat->rowvalues;
1790         for (i=0; i<nzB; i++) {
1791           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1792           else break;
1793         }
1794         imark = i;
1795         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1796         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1797       }
1798       if (idx) {
1799         *idx = idx_p = mat->rowindices;
1800         if (imark > -1) {
1801           for (i=0; i<imark; i++) {
1802             idx_p[i] = cmap[cworkB[i]];
1803           }
1804         } else {
1805           for (i=0; i<nzB; i++) {
1806             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1807             else break;
1808           }
1809           imark = i;
1810         }
1811         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1812         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1813       }
1814     } else {
1815       if (idx) *idx = 0;
1816       if (v)   *v   = 0;
1817     }
1818   }
1819   *nz  = nztot;
1820   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1821   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1826 {
1827   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1828 
1829   PetscFunctionBegin;
1830   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1831   aij->getrowactive = PETSC_FALSE;
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1836 {
1837   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1838   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1839   PetscErrorCode ierr;
1840   PetscInt       i,j,cstart = mat->cmap->rstart;
1841   PetscReal      sum = 0.0;
1842   MatScalar      *v;
1843 
1844   PetscFunctionBegin;
1845   if (aij->size == 1) {
1846     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1847   } else {
1848     if (type == NORM_FROBENIUS) {
1849       v = amat->a;
1850       for (i=0; i<amat->nz; i++) {
1851         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1852       }
1853       v = bmat->a;
1854       for (i=0; i<bmat->nz; i++) {
1855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1856       }
1857       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1858       *norm = PetscSqrtReal(*norm);
1859       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1860     } else if (type == NORM_1) { /* max column norm */
1861       PetscReal *tmp,*tmp2;
1862       PetscInt  *jj,*garray = aij->garray;
1863       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1864       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1865       *norm = 0.0;
1866       v     = amat->a; jj = amat->j;
1867       for (j=0; j<amat->nz; j++) {
1868         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1869       }
1870       v = bmat->a; jj = bmat->j;
1871       for (j=0; j<bmat->nz; j++) {
1872         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1873       }
1874       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1875       for (j=0; j<mat->cmap->N; j++) {
1876         if (tmp2[j] > *norm) *norm = tmp2[j];
1877       }
1878       ierr = PetscFree(tmp);CHKERRQ(ierr);
1879       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1880       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1881     } else if (type == NORM_INFINITY) { /* max row norm */
1882       PetscReal ntemp = 0.0;
1883       for (j=0; j<aij->A->rmap->n; j++) {
1884         v   = amat->a + amat->i[j];
1885         sum = 0.0;
1886         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1887           sum += PetscAbsScalar(*v); v++;
1888         }
1889         v = bmat->a + bmat->i[j];
1890         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v); v++;
1892         }
1893         if (sum > ntemp) ntemp = sum;
1894       }
1895       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1896       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1897     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1903 {
1904   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1905   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1906   PetscErrorCode ierr;
1907   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1908   PetscInt       cstart = A->cmap->rstart,ncol;
1909   Mat            B;
1910   MatScalar      *array;
1911 
1912   PetscFunctionBegin;
1913   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1914 
1915   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1916   ai = Aloc->i; aj = Aloc->j;
1917   bi = Bloc->i; bj = Bloc->j;
1918   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1919     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1920     PetscSFNode          *oloc;
1921     PETSC_UNUSED PetscSF sf;
1922 
1923     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1924     /* compute d_nnz for preallocation */
1925     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1926     for (i=0; i<ai[ma]; i++) {
1927       d_nnz[aj[i]]++;
1928       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1929     }
1930     /* compute local off-diagonal contributions */
1931     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1932     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1933     /* map those to global */
1934     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1935     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1936     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1937     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1938     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1939     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1940     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1941 
1942     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1943     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1944     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1945     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1946     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1947     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1948   } else {
1949     B    = *matout;
1950     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1951     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1952   }
1953 
1954   /* copy over the A part */
1955   array = Aloc->a;
1956   row   = A->rmap->rstart;
1957   for (i=0; i<ma; i++) {
1958     ncol = ai[i+1]-ai[i];
1959     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1960     row++;
1961     array += ncol; aj += ncol;
1962   }
1963   aj = Aloc->j;
1964   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1965 
1966   /* copy over the B part */
1967   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1968   array = Bloc->a;
1969   row   = A->rmap->rstart;
1970   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1971   cols_tmp = cols;
1972   for (i=0; i<mb; i++) {
1973     ncol = bi[i+1]-bi[i];
1974     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1975     row++;
1976     array += ncol; cols_tmp += ncol;
1977   }
1978   ierr = PetscFree(cols);CHKERRQ(ierr);
1979 
1980   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1981   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1982   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1983     *matout = B;
1984   } else {
1985     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1986   }
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1991 {
1992   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1993   Mat            a    = aij->A,b = aij->B;
1994   PetscErrorCode ierr;
1995   PetscInt       s1,s2,s3;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1999   if (rr) {
2000     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2001     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2002     /* Overlap communication with computation. */
2003     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2004   }
2005   if (ll) {
2006     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2007     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2008     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2009   }
2010   /* scale  the diagonal block */
2011   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2012 
2013   if (rr) {
2014     /* Do a scatter end and then right scale the off-diagonal block */
2015     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2016     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2017   }
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2022 {
2023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2024   PetscErrorCode ierr;
2025 
2026   PetscFunctionBegin;
2027   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2032 {
2033   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2034   Mat            a,b,c,d;
2035   PetscBool      flg;
2036   PetscErrorCode ierr;
2037 
2038   PetscFunctionBegin;
2039   a = matA->A; b = matA->B;
2040   c = matB->A; d = matB->B;
2041 
2042   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2043   if (flg) {
2044     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2045   }
2046   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2051 {
2052   PetscErrorCode ierr;
2053   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2054   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2055 
2056   PetscFunctionBegin;
2057   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2058   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2059     /* because of the column compression in the off-processor part of the matrix a->B,
2060        the number of columns in a->B and b->B may be different, hence we cannot call
2061        the MatCopy() directly on the two parts. If need be, we can provide a more
2062        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2063        then copying the submatrices */
2064     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2065   } else {
2066     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2067     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2068   }
2069   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2074 {
2075   PetscErrorCode ierr;
2076 
2077   PetscFunctionBegin;
2078   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /*
2083    Computes the number of nonzeros per row needed for preallocation when X and Y
2084    have different nonzero structure.
2085 */
2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2087 {
2088   PetscInt       i,j,k,nzx,nzy;
2089 
2090   PetscFunctionBegin;
2091   /* Set the number of nonzeros in the new matrix */
2092   for (i=0; i<m; i++) {
2093     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2094     nzx = xi[i+1] - xi[i];
2095     nzy = yi[i+1] - yi[i];
2096     nnz[i] = 0;
2097     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2098       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2099       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2100       nnz[i]++;
2101     }
2102     for (; k<nzy; k++) nnz[i]++;
2103   }
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2109 {
2110   PetscErrorCode ierr;
2111   PetscInt       m = Y->rmap->N;
2112   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2113   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2114 
2115   PetscFunctionBegin;
2116   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2121 {
2122   PetscErrorCode ierr;
2123   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2124   PetscBLASInt   bnz,one=1;
2125   Mat_SeqAIJ     *x,*y;
2126 
2127   PetscFunctionBegin;
2128   if (str == SAME_NONZERO_PATTERN) {
2129     PetscScalar alpha = a;
2130     x    = (Mat_SeqAIJ*)xx->A->data;
2131     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2132     y    = (Mat_SeqAIJ*)yy->A->data;
2133     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2134     x    = (Mat_SeqAIJ*)xx->B->data;
2135     y    = (Mat_SeqAIJ*)yy->B->data;
2136     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2137     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2138     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2139   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2140     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2141   } else {
2142     Mat      B;
2143     PetscInt *nnz_d,*nnz_o;
2144     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2145     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2146     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2147     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2148     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2149     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2150     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2151     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2152     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2153     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2154     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2155     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2156     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2157     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2158   }
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2163 
2164 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2165 {
2166 #if defined(PETSC_USE_COMPLEX)
2167   PetscErrorCode ierr;
2168   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2169 
2170   PetscFunctionBegin;
2171   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2172   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2173 #else
2174   PetscFunctionBegin;
2175 #endif
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2180 {
2181   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2182   PetscErrorCode ierr;
2183 
2184   PetscFunctionBegin;
2185   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2186   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2191 {
2192   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2193   PetscErrorCode ierr;
2194 
2195   PetscFunctionBegin;
2196   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2197   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2202 {
2203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2204   PetscErrorCode ierr;
2205   PetscInt       i,*idxb = 0;
2206   PetscScalar    *va,*vb;
2207   Vec            vtmp;
2208 
2209   PetscFunctionBegin;
2210   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2211   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2212   if (idx) {
2213     for (i=0; i<A->rmap->n; i++) {
2214       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2215     }
2216   }
2217 
2218   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2219   if (idx) {
2220     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2221   }
2222   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2223   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2224 
2225   for (i=0; i<A->rmap->n; i++) {
2226     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2227       va[i] = vb[i];
2228       if (idx) idx[i] = a->garray[idxb[i]];
2229     }
2230   }
2231 
2232   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2233   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2234   ierr = PetscFree(idxb);CHKERRQ(ierr);
2235   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2240 {
2241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2242   PetscErrorCode ierr;
2243   PetscInt       i,*idxb = 0;
2244   PetscScalar    *va,*vb;
2245   Vec            vtmp;
2246 
2247   PetscFunctionBegin;
2248   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2249   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2250   if (idx) {
2251     for (i=0; i<A->cmap->n; i++) {
2252       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2253     }
2254   }
2255 
2256   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2257   if (idx) {
2258     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2259   }
2260   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2261   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2262 
2263   for (i=0; i<A->rmap->n; i++) {
2264     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2265       va[i] = vb[i];
2266       if (idx) idx[i] = a->garray[idxb[i]];
2267     }
2268   }
2269 
2270   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2271   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2272   ierr = PetscFree(idxb);CHKERRQ(ierr);
2273   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2278 {
2279   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2280   PetscInt       n      = A->rmap->n;
2281   PetscInt       cstart = A->cmap->rstart;
2282   PetscInt       *cmap  = mat->garray;
2283   PetscInt       *diagIdx, *offdiagIdx;
2284   Vec            diagV, offdiagV;
2285   PetscScalar    *a, *diagA, *offdiagA;
2286   PetscInt       r;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2291   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2292   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2293   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2294   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2295   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2296   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2297   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2298   for (r = 0; r < n; ++r) {
2299     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2300       a[r]   = diagA[r];
2301       idx[r] = cstart + diagIdx[r];
2302     } else {
2303       a[r]   = offdiagA[r];
2304       idx[r] = cmap[offdiagIdx[r]];
2305     }
2306   }
2307   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2308   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2309   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2310   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2311   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2312   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2317 {
2318   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2319   PetscInt       n      = A->rmap->n;
2320   PetscInt       cstart = A->cmap->rstart;
2321   PetscInt       *cmap  = mat->garray;
2322   PetscInt       *diagIdx, *offdiagIdx;
2323   Vec            diagV, offdiagV;
2324   PetscScalar    *a, *diagA, *offdiagA;
2325   PetscInt       r;
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2330   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2331   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2332   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2333   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2334   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2335   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2336   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2337   for (r = 0; r < n; ++r) {
2338     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2339       a[r]   = diagA[r];
2340       idx[r] = cstart + diagIdx[r];
2341     } else {
2342       a[r]   = offdiagA[r];
2343       idx[r] = cmap[offdiagIdx[r]];
2344     }
2345   }
2346   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2347   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2348   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2349   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2350   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2351   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2356 {
2357   PetscErrorCode ierr;
2358   Mat            *dummy;
2359 
2360   PetscFunctionBegin;
2361   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2362   *newmat = *dummy;
2363   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2368 {
2369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2374   A->factorerrortype = a->A->factorerrortype;
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2379 {
2380   PetscErrorCode ierr;
2381   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2382 
2383   PetscFunctionBegin;
2384   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2385   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2386   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2392 {
2393   PetscFunctionBegin;
2394   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2395   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 /*@
2400    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2401 
2402    Collective on Mat
2403 
2404    Input Parameters:
2405 +    A - the matrix
2406 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2407 
2408  Level: advanced
2409 
2410 @*/
2411 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2412 {
2413   PetscErrorCode       ierr;
2414 
2415   PetscFunctionBegin;
2416   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2421 {
2422   PetscErrorCode       ierr;
2423   PetscBool            sc = PETSC_FALSE,flg;
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2427   ierr = PetscObjectOptionsBegin((PetscObject)A);
2428     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2429     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2430     if (flg) {
2431       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2432     }
2433   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2438 {
2439   PetscErrorCode ierr;
2440   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2441   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2442 
2443   PetscFunctionBegin;
2444   if (!Y->preallocated) {
2445     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2446   } else if (!aij->nz) {
2447     PetscInt nonew = aij->nonew;
2448     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2449     aij->nonew = nonew;
2450   }
2451   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2456 {
2457   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2458   PetscErrorCode ierr;
2459 
2460   PetscFunctionBegin;
2461   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2462   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2463   if (d) {
2464     PetscInt rstart;
2465     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2466     *d += rstart;
2467 
2468   }
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 
2473 /* -------------------------------------------------------------------*/
2474 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2475                                        MatGetRow_MPIAIJ,
2476                                        MatRestoreRow_MPIAIJ,
2477                                        MatMult_MPIAIJ,
2478                                 /* 4*/ MatMultAdd_MPIAIJ,
2479                                        MatMultTranspose_MPIAIJ,
2480                                        MatMultTransposeAdd_MPIAIJ,
2481                                        0,
2482                                        0,
2483                                        0,
2484                                 /*10*/ 0,
2485                                        0,
2486                                        0,
2487                                        MatSOR_MPIAIJ,
2488                                        MatTranspose_MPIAIJ,
2489                                 /*15*/ MatGetInfo_MPIAIJ,
2490                                        MatEqual_MPIAIJ,
2491                                        MatGetDiagonal_MPIAIJ,
2492                                        MatDiagonalScale_MPIAIJ,
2493                                        MatNorm_MPIAIJ,
2494                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2495                                        MatAssemblyEnd_MPIAIJ,
2496                                        MatSetOption_MPIAIJ,
2497                                        MatZeroEntries_MPIAIJ,
2498                                 /*24*/ MatZeroRows_MPIAIJ,
2499                                        0,
2500                                        0,
2501                                        0,
2502                                        0,
2503                                 /*29*/ MatSetUp_MPIAIJ,
2504                                        0,
2505                                        0,
2506                                        MatGetDiagonalBlock_MPIAIJ,
2507                                        0,
2508                                 /*34*/ MatDuplicate_MPIAIJ,
2509                                        0,
2510                                        0,
2511                                        0,
2512                                        0,
2513                                 /*39*/ MatAXPY_MPIAIJ,
2514                                        MatCreateSubMatrices_MPIAIJ,
2515                                        MatIncreaseOverlap_MPIAIJ,
2516                                        MatGetValues_MPIAIJ,
2517                                        MatCopy_MPIAIJ,
2518                                 /*44*/ MatGetRowMax_MPIAIJ,
2519                                        MatScale_MPIAIJ,
2520                                        MatShift_MPIAIJ,
2521                                        MatDiagonalSet_MPIAIJ,
2522                                        MatZeroRowsColumns_MPIAIJ,
2523                                 /*49*/ MatSetRandom_MPIAIJ,
2524                                        0,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2529                                        0,
2530                                        MatSetUnfactored_MPIAIJ,
2531                                        MatPermute_MPIAIJ,
2532                                        0,
2533                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2534                                        MatDestroy_MPIAIJ,
2535                                        MatView_MPIAIJ,
2536                                        0,
2537                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2538                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2539                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2544                                        MatGetRowMinAbs_MPIAIJ,
2545                                        0,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                 /*75*/ MatFDColoringApply_AIJ,
2550                                        MatSetFromOptions_MPIAIJ,
2551                                        0,
2552                                        0,
2553                                        MatFindZeroDiagonals_MPIAIJ,
2554                                 /*80*/ 0,
2555                                        0,
2556                                        0,
2557                                 /*83*/ MatLoad_MPIAIJ,
2558                                        MatIsSymmetric_MPIAIJ,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                        0,
2563                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2564                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2565                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2566                                        MatPtAP_MPIAIJ_MPIAIJ,
2567                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2568                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                 /*99*/ 0,
2574                                        0,
2575                                        0,
2576                                        MatConjugate_MPIAIJ,
2577                                        0,
2578                                 /*104*/MatSetValuesRow_MPIAIJ,
2579                                        MatRealPart_MPIAIJ,
2580                                        MatImaginaryPart_MPIAIJ,
2581                                        0,
2582                                        0,
2583                                 /*109*/0,
2584                                        0,
2585                                        MatGetRowMin_MPIAIJ,
2586                                        0,
2587                                        MatMissingDiagonal_MPIAIJ,
2588                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2589                                        0,
2590                                        MatGetGhosts_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                 /*119*/0,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                        MatGetMultiProcBlock_MPIAIJ,
2598                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2599                                        MatGetColumnNorms_MPIAIJ,
2600                                        MatInvertBlockDiagonal_MPIAIJ,
2601                                        0,
2602                                        MatCreateSubMatricesMPI_MPIAIJ,
2603                                 /*129*/0,
2604                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2605                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2606                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2607                                        0,
2608                                 /*134*/0,
2609                                        0,
2610                                        MatRARt_MPIAIJ_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                 /*139*/MatSetBlockSizes_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        MatFDColoringSetUp_MPIXAIJ,
2617                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2618                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2619 };
2620 
2621 /* ----------------------------------------------------------------------------------------*/
2622 
2623 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2624 {
2625   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2626   PetscErrorCode ierr;
2627 
2628   PetscFunctionBegin;
2629   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2630   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2635 {
2636   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2637   PetscErrorCode ierr;
2638 
2639   PetscFunctionBegin;
2640   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2641   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2646 {
2647   Mat_MPIAIJ     *b;
2648   PetscErrorCode ierr;
2649 
2650   PetscFunctionBegin;
2651   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2652   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2653   b = (Mat_MPIAIJ*)B->data;
2654 
2655 #if defined(PETSC_USE_CTABLE)
2656   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2657 #else
2658   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2659 #endif
2660   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2661   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2662   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2663 
2664   /* Because the B will have been resized we simply destroy it and create a new one each time */
2665   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2666   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2667   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2668   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2669   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2670   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2671 
2672   if (!B->preallocated) {
2673     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2674     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2675     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2676     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2677     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2678   }
2679 
2680   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2681   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2682   B->preallocated  = PETSC_TRUE;
2683   B->was_assembled = PETSC_FALSE;
2684   B->assembled     = PETSC_FALSE;;
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2689 {
2690   Mat            mat;
2691   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2692   PetscErrorCode ierr;
2693 
2694   PetscFunctionBegin;
2695   *newmat = 0;
2696   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2697   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2698   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2699   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2700   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2701   a       = (Mat_MPIAIJ*)mat->data;
2702 
2703   mat->factortype   = matin->factortype;
2704   mat->assembled    = PETSC_TRUE;
2705   mat->insertmode   = NOT_SET_VALUES;
2706   mat->preallocated = PETSC_TRUE;
2707 
2708   a->size         = oldmat->size;
2709   a->rank         = oldmat->rank;
2710   a->donotstash   = oldmat->donotstash;
2711   a->roworiented  = oldmat->roworiented;
2712   a->rowindices   = 0;
2713   a->rowvalues    = 0;
2714   a->getrowactive = PETSC_FALSE;
2715 
2716   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2717   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2718 
2719   if (oldmat->colmap) {
2720 #if defined(PETSC_USE_CTABLE)
2721     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2722 #else
2723     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2724     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2725     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2726 #endif
2727   } else a->colmap = 0;
2728   if (oldmat->garray) {
2729     PetscInt len;
2730     len  = oldmat->B->cmap->n;
2731     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2732     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2733     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2734   } else a->garray = 0;
2735 
2736   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2737   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2738   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2739   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2740   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2741   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2742   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2743   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2744   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2745   *newmat = mat;
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2750 {
2751   PetscScalar    *vals,*svals;
2752   MPI_Comm       comm;
2753   PetscErrorCode ierr;
2754   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2755   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2756   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2757   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2758   PetscInt       cend,cstart,n,*rowners;
2759   int            fd;
2760   PetscInt       bs = newMat->rmap->bs;
2761 
2762   PetscFunctionBegin;
2763   /* force binary viewer to load .info file if it has not yet done so */
2764   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2765   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2766   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2767   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2768   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2769   if (!rank) {
2770     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2771     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2772     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2773   }
2774 
2775   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2776   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2777   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2778   if (bs < 0) bs = 1;
2779 
2780   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2781   M    = header[1]; N = header[2];
2782 
2783   /* If global sizes are set, check if they are consistent with that given in the file */
2784   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2785   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2786 
2787   /* determine ownership of all (block) rows */
2788   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2789   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2790   else m = newMat->rmap->n; /* Set by user */
2791 
2792   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2793   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2794 
2795   /* First process needs enough room for process with most rows */
2796   if (!rank) {
2797     mmax = rowners[1];
2798     for (i=2; i<=size; i++) {
2799       mmax = PetscMax(mmax, rowners[i]);
2800     }
2801   } else mmax = -1;             /* unused, but compilers complain */
2802 
2803   rowners[0] = 0;
2804   for (i=2; i<=size; i++) {
2805     rowners[i] += rowners[i-1];
2806   }
2807   rstart = rowners[rank];
2808   rend   = rowners[rank+1];
2809 
2810   /* distribute row lengths to all processors */
2811   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2812   if (!rank) {
2813     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2814     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2815     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2816     for (j=0; j<m; j++) {
2817       procsnz[0] += ourlens[j];
2818     }
2819     for (i=1; i<size; i++) {
2820       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2821       /* calculate the number of nonzeros on each processor */
2822       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2823         procsnz[i] += rowlengths[j];
2824       }
2825       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2826     }
2827     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2828   } else {
2829     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2830   }
2831 
2832   if (!rank) {
2833     /* determine max buffer needed and allocate it */
2834     maxnz = 0;
2835     for (i=0; i<size; i++) {
2836       maxnz = PetscMax(maxnz,procsnz[i]);
2837     }
2838     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2839 
2840     /* read in my part of the matrix column indices  */
2841     nz   = procsnz[0];
2842     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2843     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2844 
2845     /* read in every one elses and ship off */
2846     for (i=1; i<size; i++) {
2847       nz   = procsnz[i];
2848       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2849       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2850     }
2851     ierr = PetscFree(cols);CHKERRQ(ierr);
2852   } else {
2853     /* determine buffer space needed for message */
2854     nz = 0;
2855     for (i=0; i<m; i++) {
2856       nz += ourlens[i];
2857     }
2858     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2859 
2860     /* receive message of column indices*/
2861     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2862   }
2863 
2864   /* determine column ownership if matrix is not square */
2865   if (N != M) {
2866     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2867     else n = newMat->cmap->n;
2868     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2869     cstart = cend - n;
2870   } else {
2871     cstart = rstart;
2872     cend   = rend;
2873     n      = cend - cstart;
2874   }
2875 
2876   /* loop over local rows, determining number of off diagonal entries */
2877   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2878   jj   = 0;
2879   for (i=0; i<m; i++) {
2880     for (j=0; j<ourlens[i]; j++) {
2881       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2882       jj++;
2883     }
2884   }
2885 
2886   for (i=0; i<m; i++) {
2887     ourlens[i] -= offlens[i];
2888   }
2889   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2890 
2891   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2892 
2893   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2894 
2895   for (i=0; i<m; i++) {
2896     ourlens[i] += offlens[i];
2897   }
2898 
2899   if (!rank) {
2900     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2901 
2902     /* read in my part of the matrix numerical values  */
2903     nz   = procsnz[0];
2904     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2905 
2906     /* insert into matrix */
2907     jj      = rstart;
2908     smycols = mycols;
2909     svals   = vals;
2910     for (i=0; i<m; i++) {
2911       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2912       smycols += ourlens[i];
2913       svals   += ourlens[i];
2914       jj++;
2915     }
2916 
2917     /* read in other processors and ship out */
2918     for (i=1; i<size; i++) {
2919       nz   = procsnz[i];
2920       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2921       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2922     }
2923     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2924   } else {
2925     /* receive numeric values */
2926     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2927 
2928     /* receive message of values*/
2929     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2930 
2931     /* insert into matrix */
2932     jj      = rstart;
2933     smycols = mycols;
2934     svals   = vals;
2935     for (i=0; i<m; i++) {
2936       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2937       smycols += ourlens[i];
2938       svals   += ourlens[i];
2939       jj++;
2940     }
2941   }
2942   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2943   ierr = PetscFree(vals);CHKERRQ(ierr);
2944   ierr = PetscFree(mycols);CHKERRQ(ierr);
2945   ierr = PetscFree(rowners);CHKERRQ(ierr);
2946   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2947   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 /* Not scalable because of ISAllGather() unless getting all columns. */
2952 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2953 {
2954   PetscErrorCode ierr;
2955   IS             iscol_local;
2956   PetscBool      isstride;
2957   PetscMPIInt    lisstride=0,gisstride;
2958 
2959   PetscFunctionBegin;
2960   /* check if we are grabbing all columns*/
2961   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2962 
2963   if (isstride) {
2964     PetscInt  start,len,mstart,mlen;
2965     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2966     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2967     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2968     if (mstart == start && mlen-mstart == len) lisstride = 1;
2969   }
2970 
2971   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2972   if (gisstride) {
2973     PetscInt N;
2974     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2975     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2976     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2977     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2978   } else {
2979     PetscInt cbs;
2980     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2981     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2982     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2983   }
2984 
2985   *isseq = iscol_local;
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 /*
2990  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
2991  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
2992 
2993  Input Parameters:
2994    mat - matrix
2995    isrow - parallel row index set; its local indices are a subset of local columns of mat,
2996            i.e., mat->rstart <= isrow[i] < mat->rend
2997    iscol - parallel column index set; its local indices are a subset of local columns of mat,
2998            i.e., mat->cstart <= iscol[i] < mat->cend
2999  Output Parameter:
3000    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3001    iscol_o - sequential column index set for retrieving mat->B
3002    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3003  */
3004 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3005 {
3006   PetscErrorCode ierr;
3007   Vec            x,cmap;
3008   const PetscInt *is_idx;
3009   PetscScalar    *xarray,*cmaparray;
3010   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3011   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3012   Mat            B=a->B;
3013   Vec            lvec=a->lvec,lcmap;
3014   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3015   MPI_Comm       comm;
3016 
3017   PetscFunctionBegin;
3018   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3019   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3020 
3021   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3022   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3023   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3024   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3025 
3026   /* Get start indices */
3027   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3028   isstart -= ncols;
3029   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3030 
3031   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3032   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3033   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3034   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3035   for (i=0; i<ncols; i++) {
3036     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3037     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3038     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3039   }
3040   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3041   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3042   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3043 
3044   /* Get iscol_d */
3045   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3046   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3047   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3048 
3049   /* Get isrow_d */
3050   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3051   rstart = mat->rmap->rstart;
3052   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3053   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3054   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3055   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3056 
3057   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3058   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3059   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3060 
3061   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3062   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3063 
3064   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3065 
3066   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3067   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3068   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3069 
3070   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3071   /* off-process column indices */
3072   count = 0;
3073   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3074   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3075 
3076   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3077   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3078   for (i=0; i<Bn; i++) {
3079     if (PetscRealPart(xarray[i]) > -1.0) {
3080       idx[count]     = i;                   /* local column index in off-diagonal part B */
3081       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3082     }
3083   }
3084   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3085   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3086 
3087   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3088   /* cannot ensure iscol_o has same blocksize as iscol! */
3089 
3090   ierr = PetscFree(idx);CHKERRQ(ierr);
3091 
3092   *garray = cmap1;
3093 
3094   ierr = VecDestroy(&x);CHKERRQ(ierr);
3095   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3096   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3101 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3102 {
3103   PetscErrorCode ierr;
3104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3105   Mat            M = NULL;
3106   MPI_Comm       comm;
3107   IS             iscol_d,isrow_d,iscol_o;
3108   Mat            Asub = NULL,Bsub = NULL;
3109   PetscInt       n;
3110 
3111   PetscFunctionBegin;
3112   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3113 
3114   if (call == MAT_REUSE_MATRIX) {
3115     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3116     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3117     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3118 
3119     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3120     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3121 
3122     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3123     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3124 
3125     /* Update diagonal and off-diagonal portions of submat */
3126     asub = (Mat_MPIAIJ*)(*submat)->data;
3127     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3128     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3129     if (n) {
3130       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3131     }
3132     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3133     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3134 
3135   } else { /* call == MAT_INITIAL_MATRIX) */
3136     const PetscInt *garray;
3137     PetscInt        BsubN;
3138 
3139     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3140     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3141 
3142     /* Create local submatrices Asub and Bsub */
3143     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3144     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3145 
3146     /* Create submatrix M */
3147     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3148 
3149     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3150     asub = (Mat_MPIAIJ*)M->data;
3151 
3152     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3153     n = asub->B->cmap->N;
3154     if (BsubN > n) {
3155       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3156       const PetscInt *idx;
3157       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3158       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3159 
3160       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3161       j = 0;
3162       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3163       for (i=0; i<n; i++) {
3164         if (j >= BsubN) break;
3165         while (subgarray[i] > garray[j]) j++;
3166 
3167         if (subgarray[i] == garray[j]) {
3168           idx_new[i] = idx[j++];
3169         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3170       }
3171       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3172 
3173       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3174       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3175 
3176     } else if (BsubN < n) {
3177       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3178     }
3179 
3180     ierr = PetscFree(garray);CHKERRQ(ierr);
3181     *submat = M;
3182 
3183     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3184     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3185     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3186 
3187     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3188     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3189 
3190     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3191     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3192   }
3193   PetscFunctionReturn(0);
3194 }
3195 
3196 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3197 {
3198   PetscErrorCode ierr;
3199   IS             iscol_local,isrow_d;
3200   PetscInt       csize;
3201   PetscInt       n,i,j,start,end;
3202   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3203   MPI_Comm       comm;
3204 
3205   PetscFunctionBegin;
3206   /* If isrow has same processor distribution as mat,
3207      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3208   if (call == MAT_REUSE_MATRIX) {
3209     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3210     if (isrow_d) {
3211       sameRowDist  = PETSC_TRUE;
3212       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3213     } else {
3214       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3215       if (iscol_local) {
3216         sameRowDist  = PETSC_TRUE;
3217         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3218       }
3219     }
3220   } else {
3221     /* Check if isrow has same processor distribution as mat */
3222     sameDist[0] = PETSC_FALSE;
3223     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3224     if (!n) {
3225       sameDist[0] = PETSC_TRUE;
3226     } else {
3227       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3228       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3229       if (i >= start && j < end) {
3230         sameDist[0] = PETSC_TRUE;
3231       }
3232     }
3233 
3234     /* Check if iscol has same processor distribution as mat */
3235     sameDist[1] = PETSC_FALSE;
3236     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3237     if (!n) {
3238       sameDist[1] = PETSC_TRUE;
3239     } else {
3240       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3241       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3242       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3243     }
3244 
3245     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3246     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3247     sameRowDist = tsameDist[0];
3248   }
3249 
3250   if (sameRowDist) {
3251     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3252       /* isrow and iscol have same processor distribution as mat */
3253       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3254     } else { /* sameRowDist */
3255       /* isrow has same processor distribution as mat */
3256       ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3257     }
3258     PetscFunctionReturn(0);
3259   }
3260 
3261   /* General case: iscol -> iscol_local which has global size of iscol */
3262   if (call == MAT_REUSE_MATRIX) {
3263     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3264     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3265   } else {
3266     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3267   }
3268 
3269   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3270   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3271 
3272   if (call == MAT_INITIAL_MATRIX) {
3273     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3274     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3275   }
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 /*@C
3280      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3281          and "off-diagonal" part of the matrix in CSR format.
3282 
3283    Collective on MPI_Comm
3284 
3285    Input Parameters:
3286 +  comm - MPI communicator
3287 .  A - "diagonal" portion of matrix
3288 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3289 -  garray - global index of B columns
3290 
3291    Output Parameter:
3292 .   mat - the matrix, with input A as its local diagonal matrix
3293    Level: advanced
3294 
3295    Notes:
3296        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3297        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3298 
3299 .seealso: MatCreateMPIAIJWithSplitArrays()
3300 @*/
3301 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3302 {
3303   PetscErrorCode ierr;
3304   Mat_MPIAIJ     *maij;
3305   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3306   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3307   PetscScalar    *oa=b->a;
3308   Mat            Bnew;
3309   PetscInt       m,n,N;
3310 
3311   PetscFunctionBegin;
3312   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3313   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3314   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3315   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3316   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3317   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3318 
3319   /* Get global columns of mat */
3320   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3321 
3322   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3323   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3324   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3325   maij = (Mat_MPIAIJ*)(*mat)->data;
3326 
3327   (*mat)->preallocated = PETSC_TRUE;
3328 
3329   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3330   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3331 
3332   /* Set A as diagonal portion of *mat */
3333   maij->A = A;
3334 
3335   nz = oi[m];
3336   for (i=0; i<nz; i++) {
3337     col   = oj[i];
3338     oj[i] = garray[col];
3339   }
3340 
3341    /* Set Bnew as off-diagonal portion of *mat */
3342   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3343   bnew        = (Mat_SeqAIJ*)Bnew->data;
3344   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3345   maij->B     = Bnew;
3346 
3347   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3348 
3349   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3350   b->free_a       = PETSC_FALSE;
3351   b->free_ij      = PETSC_FALSE;
3352   ierr = MatDestroy(&B);CHKERRQ(ierr);
3353 
3354   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3355   bnew->free_a       = PETSC_TRUE;
3356   bnew->free_ij      = PETSC_TRUE;
3357 
3358   /* condense columns of maij->B */
3359   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3360   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3361   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3362   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3363   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3364   PetscFunctionReturn(0);
3365 }
3366 
3367 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3368 
3369 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3370 {
3371   PetscErrorCode ierr;
3372   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3373   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3374   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3375   Mat            M,Msub,B=a->B;
3376   MatScalar      *aa;
3377   Mat_SeqAIJ     *aij;
3378   PetscInt       *garray = a->garray,*colsub,Ncols;
3379   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3380   IS             iscol_sub,iscmap;
3381   const PetscInt *is_idx,*cmap;
3382   PetscBool      allcolumns=PETSC_FALSE;
3383   IS             iscol_local=NULL;
3384   MPI_Comm       comm;
3385 
3386   PetscFunctionBegin;
3387   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3388 
3389   if (call == MAT_REUSE_MATRIX) {
3390     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3391     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3392     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3393 
3394     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3395     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3396 
3397     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3398     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3399 
3400     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3401 
3402   } else { /* call == MAT_INITIAL_MATRIX) */
3403     PetscBool flg;
3404 
3405     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3406     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3407 
3408     /* (1) iscol -> nonscalable iscol_local */
3409     ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3410     ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3411     if (n != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != Ncols %d",n,Ncols);
3412 
3413     /* Check for special case: each processor gets entire matrix columns */
3414     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3415     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3416     if (allcolumns) {
3417       iscol_sub = iscol_local;
3418       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3419       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3420 
3421     } else {
3422       /* (2) iscol_local -> iscol_sub and iscmap */
3423       PetscInt *idx,*cmap1,k;
3424 
3425       /* implementation below requires iscol_local be sorted, it can have duplicate indices */
3426       ierr = ISSorted(iscol_local,&flg);CHKERRQ(ierr);
3427       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsorted iscol_local is not implemented yet");
3428 
3429       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3430       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3431       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3432       count = 0;
3433       k     = 0;
3434       for (i=0; i<Ncols; i++) {
3435         j = is_idx[i];
3436         if (j >= cstart && j < cend) {
3437           /* diagonal part of mat */
3438           idx[count]     = j;
3439           cmap1[count++] = i; /* column index in submat */
3440         } else if (Bn) {
3441           /* off-diagonal part of mat */
3442           if (j == garray[k]) {
3443             idx[count]     = j;
3444             cmap1[count++] = i;  /* column index in submat */
3445           } else if (j > garray[k]) {
3446             while (j > garray[k] && k < Bn-1) k++;
3447             if (j == garray[k]) {
3448               idx[count]     = j;
3449               cmap1[count++] = i; /* column index in submat */
3450             }
3451           }
3452         }
3453       }
3454       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3455 
3456       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3457       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3458       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3459 
3460       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3461     }
3462 
3463     /* (3) Create sequential Msub */
3464     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3465   }
3466 
3467   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3468   aij  = (Mat_SeqAIJ*)(Msub)->data;
3469   ii   = aij->i;
3470   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3471 
3472   /*
3473       m - number of local rows
3474       Ncols - number of columns (same on all processors)
3475       rstart - first row in new global matrix generated
3476   */
3477   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3478 
3479   if (call == MAT_INITIAL_MATRIX) {
3480     /* (4) Create parallel newmat */
3481     PetscMPIInt    rank,size;
3482     PetscInt       csize;
3483 
3484     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3485     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3486 
3487     /*
3488         Determine the number of non-zeros in the diagonal and off-diagonal
3489         portions of the matrix in order to do correct preallocation
3490     */
3491 
3492     /* first get start and end of "diagonal" columns */
3493     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3494     if (csize == PETSC_DECIDE) {
3495       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3496       if (mglobal == Ncols) { /* square matrix */
3497         nlocal = m;
3498       } else {
3499         nlocal = Ncols/size + ((Ncols % size) > rank);
3500       }
3501     } else {
3502       nlocal = csize;
3503     }
3504     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3505     rstart = rend - nlocal;
3506     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3507 
3508     /* next, compute all the lengths */
3509     jj    = aij->j;
3510     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3511     olens = dlens + m;
3512     for (i=0; i<m; i++) {
3513       jend = ii[i+1] - ii[i];
3514       olen = 0;
3515       dlen = 0;
3516       for (j=0; j<jend; j++) {
3517         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3518         else dlen++;
3519         jj++;
3520       }
3521       olens[i] = olen;
3522       dlens[i] = dlen;
3523     }
3524 
3525     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3526     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3527 
3528     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3529     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3530     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3531     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3532     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3533     ierr = PetscFree(dlens);CHKERRQ(ierr);
3534 
3535   } else { /* call == MAT_REUSE_MATRIX */
3536     M    = *newmat;
3537     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3538     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3539     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3540     /*
3541          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3542        rather than the slower MatSetValues().
3543     */
3544     M->was_assembled = PETSC_TRUE;
3545     M->assembled     = PETSC_FALSE;
3546   }
3547 
3548   /* (5) Set values of Msub to *newmat */
3549   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3550   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3551 
3552   jj   = aij->j;
3553   aa   = aij->a;
3554   for (i=0; i<m; i++) {
3555     row = rstart + i;
3556     nz  = ii[i+1] - ii[i];
3557     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3558     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3559     jj += nz; aa += nz;
3560   }
3561   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3562 
3563   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3564   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3565 
3566   ierr = PetscFree(colsub);CHKERRQ(ierr);
3567 
3568   /* save Msub, iscol_sub and iscmap used in processor for next request */
3569   if (call ==  MAT_INITIAL_MATRIX) {
3570     *newmat = M;
3571     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3572     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3573 
3574     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3575     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3576 
3577     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3578     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3579 
3580     if (iscol_local) {
3581       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3582       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3583     }
3584   }
3585   PetscFunctionReturn(0);
3586 }
3587 
3588 /*
3589     Not great since it makes two copies of the submatrix, first an SeqAIJ
3590   in local and then by concatenating the local matrices the end result.
3591   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3592 
3593   Note: This requires a sequential iscol with all indices.
3594 */
3595 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3596 {
3597   PetscErrorCode ierr;
3598   PetscMPIInt    rank,size;
3599   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3600   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3601   Mat            M,Mreuse;
3602   MatScalar      *aa,*vwork;
3603   MPI_Comm       comm;
3604   Mat_SeqAIJ     *aij;
3605   PetscBool      colflag,allcolumns=PETSC_FALSE;
3606 
3607   PetscFunctionBegin;
3608   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3609   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3610   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3611 
3612   /* Check for special case: each processor gets entire matrix columns */
3613   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3614   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3615   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3616 
3617   if (call ==  MAT_REUSE_MATRIX) {
3618     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3619     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3620     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3621   } else {
3622     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3623   }
3624 
3625   /*
3626       m - number of local rows
3627       n - number of columns (same on all processors)
3628       rstart - first row in new global matrix generated
3629   */
3630   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3631   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3632   if (call == MAT_INITIAL_MATRIX) {
3633     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3634     ii  = aij->i;
3635     jj  = aij->j;
3636 
3637     /*
3638         Determine the number of non-zeros in the diagonal and off-diagonal
3639         portions of the matrix in order to do correct preallocation
3640     */
3641 
3642     /* first get start and end of "diagonal" columns */
3643     if (csize == PETSC_DECIDE) {
3644       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3645       if (mglobal == n) { /* square matrix */
3646         nlocal = m;
3647       } else {
3648         nlocal = n/size + ((n % size) > rank);
3649       }
3650     } else {
3651       nlocal = csize;
3652     }
3653     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3654     rstart = rend - nlocal;
3655     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3656 
3657     /* next, compute all the lengths */
3658     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3659     olens = dlens + m;
3660     for (i=0; i<m; i++) {
3661       jend = ii[i+1] - ii[i];
3662       olen = 0;
3663       dlen = 0;
3664       for (j=0; j<jend; j++) {
3665         if (*jj < rstart || *jj >= rend) olen++;
3666         else dlen++;
3667         jj++;
3668       }
3669       olens[i] = olen;
3670       dlens[i] = dlen;
3671     }
3672     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3673     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3674     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3675     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3676     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3677     ierr = PetscFree(dlens);CHKERRQ(ierr);
3678   } else {
3679     PetscInt ml,nl;
3680 
3681     M    = *newmat;
3682     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3683     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3684     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3685     /*
3686          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3687        rather than the slower MatSetValues().
3688     */
3689     M->was_assembled = PETSC_TRUE;
3690     M->assembled     = PETSC_FALSE;
3691   }
3692   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3693   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3694   ii   = aij->i;
3695   jj   = aij->j;
3696   aa   = aij->a;
3697   for (i=0; i<m; i++) {
3698     row   = rstart + i;
3699     nz    = ii[i+1] - ii[i];
3700     cwork = jj;     jj += nz;
3701     vwork = aa;     aa += nz;
3702     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3703   }
3704 
3705   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3706   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3707   *newmat = M;
3708 
3709   /* save submatrix used in processor for next request */
3710   if (call ==  MAT_INITIAL_MATRIX) {
3711     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3712     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3713   }
3714   PetscFunctionReturn(0);
3715 }
3716 
3717 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3718 {
3719   PetscInt       m,cstart, cend,j,nnz,i,d;
3720   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3721   const PetscInt *JJ;
3722   PetscScalar    *values;
3723   PetscErrorCode ierr;
3724   PetscBool      nooffprocentries;
3725 
3726   PetscFunctionBegin;
3727   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3728 
3729   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3730   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3731   m      = B->rmap->n;
3732   cstart = B->cmap->rstart;
3733   cend   = B->cmap->rend;
3734   rstart = B->rmap->rstart;
3735 
3736   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3737 
3738 #if defined(PETSC_USE_DEBUGGING)
3739   for (i=0; i<m; i++) {
3740     nnz = Ii[i+1]- Ii[i];
3741     JJ  = J + Ii[i];
3742     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3743     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3744     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3745   }
3746 #endif
3747 
3748   for (i=0; i<m; i++) {
3749     nnz     = Ii[i+1]- Ii[i];
3750     JJ      = J + Ii[i];
3751     nnz_max = PetscMax(nnz_max,nnz);
3752     d       = 0;
3753     for (j=0; j<nnz; j++) {
3754       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3755     }
3756     d_nnz[i] = d;
3757     o_nnz[i] = nnz - d;
3758   }
3759   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3760   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3761 
3762   if (v) values = (PetscScalar*)v;
3763   else {
3764     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3765   }
3766 
3767   for (i=0; i<m; i++) {
3768     ii   = i + rstart;
3769     nnz  = Ii[i+1]- Ii[i];
3770     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3771   }
3772   nooffprocentries    = B->nooffprocentries;
3773   B->nooffprocentries = PETSC_TRUE;
3774   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3775   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3776   B->nooffprocentries = nooffprocentries;
3777 
3778   if (!v) {
3779     ierr = PetscFree(values);CHKERRQ(ierr);
3780   }
3781   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 /*@
3786    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3787    (the default parallel PETSc format).
3788 
3789    Collective on MPI_Comm
3790 
3791    Input Parameters:
3792 +  B - the matrix
3793 .  i - the indices into j for the start of each local row (starts with zero)
3794 .  j - the column indices for each local row (starts with zero)
3795 -  v - optional values in the matrix
3796 
3797    Level: developer
3798 
3799    Notes:
3800        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3801      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3802      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3803 
3804        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3805 
3806        The format which is used for the sparse matrix input, is equivalent to a
3807     row-major ordering.. i.e for the following matrix, the input data expected is
3808     as shown
3809 
3810 $        1 0 0
3811 $        2 0 3     P0
3812 $       -------
3813 $        4 5 6     P1
3814 $
3815 $     Process0 [P0]: rows_owned=[0,1]
3816 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3817 $        j =  {0,0,2}  [size = 3]
3818 $        v =  {1,2,3}  [size = 3]
3819 $
3820 $     Process1 [P1]: rows_owned=[2]
3821 $        i =  {0,3}    [size = nrow+1  = 1+1]
3822 $        j =  {0,1,2}  [size = 3]
3823 $        v =  {4,5,6}  [size = 3]
3824 
3825 .keywords: matrix, aij, compressed row, sparse, parallel
3826 
3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3828           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3829 @*/
3830 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3831 {
3832   PetscErrorCode ierr;
3833 
3834   PetscFunctionBegin;
3835   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /*@C
3840    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3841    (the default parallel PETSc format).  For good matrix assembly performance
3842    the user should preallocate the matrix storage by setting the parameters
3843    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3844    performance can be increased by more than a factor of 50.
3845 
3846    Collective on MPI_Comm
3847 
3848    Input Parameters:
3849 +  B - the matrix
3850 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3851            (same value is used for all local rows)
3852 .  d_nnz - array containing the number of nonzeros in the various rows of the
3853            DIAGONAL portion of the local submatrix (possibly different for each row)
3854            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3855            The size of this array is equal to the number of local rows, i.e 'm'.
3856            For matrices that will be factored, you must leave room for (and set)
3857            the diagonal entry even if it is zero.
3858 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3859            submatrix (same value is used for all local rows).
3860 -  o_nnz - array containing the number of nonzeros in the various rows of the
3861            OFF-DIAGONAL portion of the local submatrix (possibly different for
3862            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3863            structure. The size of this array is equal to the number
3864            of local rows, i.e 'm'.
3865 
3866    If the *_nnz parameter is given then the *_nz parameter is ignored
3867 
3868    The AIJ format (also called the Yale sparse matrix format or
3869    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3870    storage.  The stored row and column indices begin with zero.
3871    See Users-Manual: ch_mat for details.
3872 
3873    The parallel matrix is partitioned such that the first m0 rows belong to
3874    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3875    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3876 
3877    The DIAGONAL portion of the local submatrix of a processor can be defined
3878    as the submatrix which is obtained by extraction the part corresponding to
3879    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3880    first row that belongs to the processor, r2 is the last row belonging to
3881    the this processor, and c1-c2 is range of indices of the local part of a
3882    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3883    common case of a square matrix, the row and column ranges are the same and
3884    the DIAGONAL part is also square. The remaining portion of the local
3885    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3886 
3887    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3888 
3889    You can call MatGetInfo() to get information on how effective the preallocation was;
3890    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3891    You can also run with the option -info and look for messages with the string
3892    malloc in them to see if additional memory allocation was needed.
3893 
3894    Example usage:
3895 
3896    Consider the following 8x8 matrix with 34 non-zero values, that is
3897    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3898    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3899    as follows:
3900 
3901 .vb
3902             1  2  0  |  0  3  0  |  0  4
3903     Proc0   0  5  6  |  7  0  0  |  8  0
3904             9  0 10  | 11  0  0  | 12  0
3905     -------------------------------------
3906            13  0 14  | 15 16 17  |  0  0
3907     Proc1   0 18  0  | 19 20 21  |  0  0
3908             0  0  0  | 22 23  0  | 24  0
3909     -------------------------------------
3910     Proc2  25 26 27  |  0  0 28  | 29  0
3911            30  0  0  | 31 32 33  |  0 34
3912 .ve
3913 
3914    This can be represented as a collection of submatrices as:
3915 
3916 .vb
3917       A B C
3918       D E F
3919       G H I
3920 .ve
3921 
3922    Where the submatrices A,B,C are owned by proc0, D,E,F are
3923    owned by proc1, G,H,I are owned by proc2.
3924 
3925    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3926    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3927    The 'M','N' parameters are 8,8, and have the same values on all procs.
3928 
3929    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3930    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3931    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3932    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3933    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3934    matrix, ans [DF] as another SeqAIJ matrix.
3935 
3936    When d_nz, o_nz parameters are specified, d_nz storage elements are
3937    allocated for every row of the local diagonal submatrix, and o_nz
3938    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3939    One way to choose d_nz and o_nz is to use the max nonzerors per local
3940    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3941    In this case, the values of d_nz,o_nz are:
3942 .vb
3943      proc0 : dnz = 2, o_nz = 2
3944      proc1 : dnz = 3, o_nz = 2
3945      proc2 : dnz = 1, o_nz = 4
3946 .ve
3947    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3948    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3949    for proc3. i.e we are using 12+15+10=37 storage locations to store
3950    34 values.
3951 
3952    When d_nnz, o_nnz parameters are specified, the storage is specified
3953    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3954    In the above case the values for d_nnz,o_nnz are:
3955 .vb
3956      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3957      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3958      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3959 .ve
3960    Here the space allocated is sum of all the above values i.e 34, and
3961    hence pre-allocation is perfect.
3962 
3963    Level: intermediate
3964 
3965 .keywords: matrix, aij, compressed row, sparse, parallel
3966 
3967 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3968           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3969 @*/
3970 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3971 {
3972   PetscErrorCode ierr;
3973 
3974   PetscFunctionBegin;
3975   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3976   PetscValidType(B,1);
3977   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 /*@
3982      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3983          CSR format the local rows.
3984 
3985    Collective on MPI_Comm
3986 
3987    Input Parameters:
3988 +  comm - MPI communicator
3989 .  m - number of local rows (Cannot be PETSC_DECIDE)
3990 .  n - This value should be the same as the local size used in creating the
3991        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3992        calculated if N is given) For square matrices n is almost always m.
3993 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3994 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3995 .   i - row indices
3996 .   j - column indices
3997 -   a - matrix values
3998 
3999    Output Parameter:
4000 .   mat - the matrix
4001 
4002    Level: intermediate
4003 
4004    Notes:
4005        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4006      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4007      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4008 
4009        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4010 
4011        The format which is used for the sparse matrix input, is equivalent to a
4012     row-major ordering.. i.e for the following matrix, the input data expected is
4013     as shown
4014 
4015 $        1 0 0
4016 $        2 0 3     P0
4017 $       -------
4018 $        4 5 6     P1
4019 $
4020 $     Process0 [P0]: rows_owned=[0,1]
4021 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4022 $        j =  {0,0,2}  [size = 3]
4023 $        v =  {1,2,3}  [size = 3]
4024 $
4025 $     Process1 [P1]: rows_owned=[2]
4026 $        i =  {0,3}    [size = nrow+1  = 1+1]
4027 $        j =  {0,1,2}  [size = 3]
4028 $        v =  {4,5,6}  [size = 3]
4029 
4030 .keywords: matrix, aij, compressed row, sparse, parallel
4031 
4032 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4033           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4034 @*/
4035 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4036 {
4037   PetscErrorCode ierr;
4038 
4039   PetscFunctionBegin;
4040   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4041   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4042   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4043   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4044   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4045   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4046   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4047   PetscFunctionReturn(0);
4048 }
4049 
4050 /*@C
4051    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4052    (the default parallel PETSc format).  For good matrix assembly performance
4053    the user should preallocate the matrix storage by setting the parameters
4054    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4055    performance can be increased by more than a factor of 50.
4056 
4057    Collective on MPI_Comm
4058 
4059    Input Parameters:
4060 +  comm - MPI communicator
4061 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4062            This value should be the same as the local size used in creating the
4063            y vector for the matrix-vector product y = Ax.
4064 .  n - This value should be the same as the local size used in creating the
4065        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4066        calculated if N is given) For square matrices n is almost always m.
4067 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4068 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4069 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4070            (same value is used for all local rows)
4071 .  d_nnz - array containing the number of nonzeros in the various rows of the
4072            DIAGONAL portion of the local submatrix (possibly different for each row)
4073            or NULL, if d_nz is used to specify the nonzero structure.
4074            The size of this array is equal to the number of local rows, i.e 'm'.
4075 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4076            submatrix (same value is used for all local rows).
4077 -  o_nnz - array containing the number of nonzeros in the various rows of the
4078            OFF-DIAGONAL portion of the local submatrix (possibly different for
4079            each row) or NULL, if o_nz is used to specify the nonzero
4080            structure. The size of this array is equal to the number
4081            of local rows, i.e 'm'.
4082 
4083    Output Parameter:
4084 .  A - the matrix
4085 
4086    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4087    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4088    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4089 
4090    Notes:
4091    If the *_nnz parameter is given then the *_nz parameter is ignored
4092 
4093    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4094    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4095    storage requirements for this matrix.
4096 
4097    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4098    processor than it must be used on all processors that share the object for
4099    that argument.
4100 
4101    The user MUST specify either the local or global matrix dimensions
4102    (possibly both).
4103 
4104    The parallel matrix is partitioned across processors such that the
4105    first m0 rows belong to process 0, the next m1 rows belong to
4106    process 1, the next m2 rows belong to process 2 etc.. where
4107    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4108    values corresponding to [m x N] submatrix.
4109 
4110    The columns are logically partitioned with the n0 columns belonging
4111    to 0th partition, the next n1 columns belonging to the next
4112    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4113 
4114    The DIAGONAL portion of the local submatrix on any given processor
4115    is the submatrix corresponding to the rows and columns m,n
4116    corresponding to the given processor. i.e diagonal matrix on
4117    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4118    etc. The remaining portion of the local submatrix [m x (N-n)]
4119    constitute the OFF-DIAGONAL portion. The example below better
4120    illustrates this concept.
4121 
4122    For a square global matrix we define each processor's diagonal portion
4123    to be its local rows and the corresponding columns (a square submatrix);
4124    each processor's off-diagonal portion encompasses the remainder of the
4125    local matrix (a rectangular submatrix).
4126 
4127    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4128 
4129    When calling this routine with a single process communicator, a matrix of
4130    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4131    type of communicator, use the construction mechanism
4132 .vb
4133      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4134 .ve
4135 
4136 $     MatCreate(...,&A);
4137 $     MatSetType(A,MATMPIAIJ);
4138 $     MatSetSizes(A, m,n,M,N);
4139 $     MatMPIAIJSetPreallocation(A,...);
4140 
4141    By default, this format uses inodes (identical nodes) when possible.
4142    We search for consecutive rows with the same nonzero structure, thereby
4143    reusing matrix information to achieve increased efficiency.
4144 
4145    Options Database Keys:
4146 +  -mat_no_inode  - Do not use inodes
4147 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4148 -  -mat_aij_oneindex - Internally use indexing starting at 1
4149         rather than 0.  Note that when calling MatSetValues(),
4150         the user still MUST index entries starting at 0!
4151 
4152 
4153    Example usage:
4154 
4155    Consider the following 8x8 matrix with 34 non-zero values, that is
4156    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4157    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4158    as follows
4159 
4160 .vb
4161             1  2  0  |  0  3  0  |  0  4
4162     Proc0   0  5  6  |  7  0  0  |  8  0
4163             9  0 10  | 11  0  0  | 12  0
4164     -------------------------------------
4165            13  0 14  | 15 16 17  |  0  0
4166     Proc1   0 18  0  | 19 20 21  |  0  0
4167             0  0  0  | 22 23  0  | 24  0
4168     -------------------------------------
4169     Proc2  25 26 27  |  0  0 28  | 29  0
4170            30  0  0  | 31 32 33  |  0 34
4171 .ve
4172 
4173    This can be represented as a collection of submatrices as
4174 
4175 .vb
4176       A B C
4177       D E F
4178       G H I
4179 .ve
4180 
4181    Where the submatrices A,B,C are owned by proc0, D,E,F are
4182    owned by proc1, G,H,I are owned by proc2.
4183 
4184    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4185    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4186    The 'M','N' parameters are 8,8, and have the same values on all procs.
4187 
4188    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4189    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4190    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4191    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4192    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4193    matrix, ans [DF] as another SeqAIJ matrix.
4194 
4195    When d_nz, o_nz parameters are specified, d_nz storage elements are
4196    allocated for every row of the local diagonal submatrix, and o_nz
4197    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4198    One way to choose d_nz and o_nz is to use the max nonzerors per local
4199    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4200    In this case, the values of d_nz,o_nz are
4201 .vb
4202      proc0 : dnz = 2, o_nz = 2
4203      proc1 : dnz = 3, o_nz = 2
4204      proc2 : dnz = 1, o_nz = 4
4205 .ve
4206    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4207    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4208    for proc3. i.e we are using 12+15+10=37 storage locations to store
4209    34 values.
4210 
4211    When d_nnz, o_nnz parameters are specified, the storage is specified
4212    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4213    In the above case the values for d_nnz,o_nnz are
4214 .vb
4215      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4216      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4217      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4218 .ve
4219    Here the space allocated is sum of all the above values i.e 34, and
4220    hence pre-allocation is perfect.
4221 
4222    Level: intermediate
4223 
4224 .keywords: matrix, aij, compressed row, sparse, parallel
4225 
4226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4227           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4228 @*/
4229 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4230 {
4231   PetscErrorCode ierr;
4232   PetscMPIInt    size;
4233 
4234   PetscFunctionBegin;
4235   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4236   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4237   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4238   if (size > 1) {
4239     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4240     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4241   } else {
4242     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4243     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4244   }
4245   PetscFunctionReturn(0);
4246 }
4247 
4248 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4249 {
4250   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4251   PetscBool      flg;
4252   PetscErrorCode ierr;
4253 
4254   PetscFunctionBegin;
4255   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4256   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4257   if (Ad)     *Ad     = a->A;
4258   if (Ao)     *Ao     = a->B;
4259   if (colmap) *colmap = a->garray;
4260   PetscFunctionReturn(0);
4261 }
4262 
4263 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4264 {
4265   PetscErrorCode ierr;
4266   PetscInt       m,N,i,rstart,nnz,Ii;
4267   PetscInt       *indx;
4268   PetscScalar    *values;
4269 
4270   PetscFunctionBegin;
4271   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4272   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4273     PetscInt       *dnz,*onz,sum,bs,cbs;
4274 
4275     if (n == PETSC_DECIDE) {
4276       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4277     }
4278     /* Check sum(n) = N */
4279     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4280     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4281 
4282     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4283     rstart -= m;
4284 
4285     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4286     for (i=0; i<m; i++) {
4287       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4288       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4289       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4290     }
4291 
4292     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4293     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4294     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4295     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4296     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4297     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4298     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4299     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4300   }
4301 
4302   /* numeric phase */
4303   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4304   for (i=0; i<m; i++) {
4305     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4306     Ii   = i + rstart;
4307     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4308     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4309   }
4310   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4311   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4312   PetscFunctionReturn(0);
4313 }
4314 
4315 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4316 {
4317   PetscErrorCode    ierr;
4318   PetscMPIInt       rank;
4319   PetscInt          m,N,i,rstart,nnz;
4320   size_t            len;
4321   const PetscInt    *indx;
4322   PetscViewer       out;
4323   char              *name;
4324   Mat               B;
4325   const PetscScalar *values;
4326 
4327   PetscFunctionBegin;
4328   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4329   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4330   /* Should this be the type of the diagonal block of A? */
4331   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4332   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4333   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4334   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4335   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4336   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4337   for (i=0; i<m; i++) {
4338     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4339     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4340     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4341   }
4342   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4343   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4344 
4345   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4346   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4347   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4348   sprintf(name,"%s.%d",outfile,rank);
4349   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4350   ierr = PetscFree(name);CHKERRQ(ierr);
4351   ierr = MatView(B,out);CHKERRQ(ierr);
4352   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4353   ierr = MatDestroy(&B);CHKERRQ(ierr);
4354   PetscFunctionReturn(0);
4355 }
4356 
4357 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4358 {
4359   PetscErrorCode      ierr;
4360   Mat_Merge_SeqsToMPI *merge;
4361   PetscContainer      container;
4362 
4363   PetscFunctionBegin;
4364   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4365   if (container) {
4366     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4367     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4368     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4369     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4370     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4371     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4372     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4373     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4374     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4375     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4376     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4377     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4378     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4379     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4380     ierr = PetscFree(merge);CHKERRQ(ierr);
4381     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4382   }
4383   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4384   PetscFunctionReturn(0);
4385 }
4386 
4387 #include <../src/mat/utils/freespace.h>
4388 #include <petscbt.h>
4389 
4390 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4391 {
4392   PetscErrorCode      ierr;
4393   MPI_Comm            comm;
4394   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4395   PetscMPIInt         size,rank,taga,*len_s;
4396   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4397   PetscInt            proc,m;
4398   PetscInt            **buf_ri,**buf_rj;
4399   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4400   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4401   MPI_Request         *s_waits,*r_waits;
4402   MPI_Status          *status;
4403   MatScalar           *aa=a->a;
4404   MatScalar           **abuf_r,*ba_i;
4405   Mat_Merge_SeqsToMPI *merge;
4406   PetscContainer      container;
4407 
4408   PetscFunctionBegin;
4409   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4410   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4411 
4412   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4413   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4414 
4415   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4416   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4417 
4418   bi     = merge->bi;
4419   bj     = merge->bj;
4420   buf_ri = merge->buf_ri;
4421   buf_rj = merge->buf_rj;
4422 
4423   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4424   owners = merge->rowmap->range;
4425   len_s  = merge->len_s;
4426 
4427   /* send and recv matrix values */
4428   /*-----------------------------*/
4429   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4430   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4431 
4432   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4433   for (proc=0,k=0; proc<size; proc++) {
4434     if (!len_s[proc]) continue;
4435     i    = owners[proc];
4436     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4437     k++;
4438   }
4439 
4440   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4441   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4442   ierr = PetscFree(status);CHKERRQ(ierr);
4443 
4444   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4445   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4446 
4447   /* insert mat values of mpimat */
4448   /*----------------------------*/
4449   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4450   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4451 
4452   for (k=0; k<merge->nrecv; k++) {
4453     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4454     nrows       = *(buf_ri_k[k]);
4455     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4456     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4457   }
4458 
4459   /* set values of ba */
4460   m = merge->rowmap->n;
4461   for (i=0; i<m; i++) {
4462     arow = owners[rank] + i;
4463     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4464     bnzi = bi[i+1] - bi[i];
4465     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4466 
4467     /* add local non-zero vals of this proc's seqmat into ba */
4468     anzi   = ai[arow+1] - ai[arow];
4469     aj     = a->j + ai[arow];
4470     aa     = a->a + ai[arow];
4471     nextaj = 0;
4472     for (j=0; nextaj<anzi; j++) {
4473       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4474         ba_i[j] += aa[nextaj++];
4475       }
4476     }
4477 
4478     /* add received vals into ba */
4479     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4480       /* i-th row */
4481       if (i == *nextrow[k]) {
4482         anzi   = *(nextai[k]+1) - *nextai[k];
4483         aj     = buf_rj[k] + *(nextai[k]);
4484         aa     = abuf_r[k] + *(nextai[k]);
4485         nextaj = 0;
4486         for (j=0; nextaj<anzi; j++) {
4487           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4488             ba_i[j] += aa[nextaj++];
4489           }
4490         }
4491         nextrow[k]++; nextai[k]++;
4492       }
4493     }
4494     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4495   }
4496   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4497   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4498 
4499   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4500   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4501   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4502   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4503   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4508 {
4509   PetscErrorCode      ierr;
4510   Mat                 B_mpi;
4511   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4512   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4513   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4514   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4515   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4516   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4517   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4518   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4519   MPI_Status          *status;
4520   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4521   PetscBT             lnkbt;
4522   Mat_Merge_SeqsToMPI *merge;
4523   PetscContainer      container;
4524 
4525   PetscFunctionBegin;
4526   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4527 
4528   /* make sure it is a PETSc comm */
4529   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4531   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4532 
4533   ierr = PetscNew(&merge);CHKERRQ(ierr);
4534   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4535 
4536   /* determine row ownership */
4537   /*---------------------------------------------------------*/
4538   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4539   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4540   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4541   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4542   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4543   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4544   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4545 
4546   m      = merge->rowmap->n;
4547   owners = merge->rowmap->range;
4548 
4549   /* determine the number of messages to send, their lengths */
4550   /*---------------------------------------------------------*/
4551   len_s = merge->len_s;
4552 
4553   len          = 0; /* length of buf_si[] */
4554   merge->nsend = 0;
4555   for (proc=0; proc<size; proc++) {
4556     len_si[proc] = 0;
4557     if (proc == rank) {
4558       len_s[proc] = 0;
4559     } else {
4560       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4561       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4562     }
4563     if (len_s[proc]) {
4564       merge->nsend++;
4565       nrows = 0;
4566       for (i=owners[proc]; i<owners[proc+1]; i++) {
4567         if (ai[i+1] > ai[i]) nrows++;
4568       }
4569       len_si[proc] = 2*(nrows+1);
4570       len         += len_si[proc];
4571     }
4572   }
4573 
4574   /* determine the number and length of messages to receive for ij-structure */
4575   /*-------------------------------------------------------------------------*/
4576   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4577   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4578 
4579   /* post the Irecv of j-structure */
4580   /*-------------------------------*/
4581   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4582   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4583 
4584   /* post the Isend of j-structure */
4585   /*--------------------------------*/
4586   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4587 
4588   for (proc=0, k=0; proc<size; proc++) {
4589     if (!len_s[proc]) continue;
4590     i    = owners[proc];
4591     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4592     k++;
4593   }
4594 
4595   /* receives and sends of j-structure are complete */
4596   /*------------------------------------------------*/
4597   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4598   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4599 
4600   /* send and recv i-structure */
4601   /*---------------------------*/
4602   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4603   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4604 
4605   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4606   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4607   for (proc=0,k=0; proc<size; proc++) {
4608     if (!len_s[proc]) continue;
4609     /* form outgoing message for i-structure:
4610          buf_si[0]:                 nrows to be sent
4611                [1:nrows]:           row index (global)
4612                [nrows+1:2*nrows+1]: i-structure index
4613     */
4614     /*-------------------------------------------*/
4615     nrows       = len_si[proc]/2 - 1;
4616     buf_si_i    = buf_si + nrows+1;
4617     buf_si[0]   = nrows;
4618     buf_si_i[0] = 0;
4619     nrows       = 0;
4620     for (i=owners[proc]; i<owners[proc+1]; i++) {
4621       anzi = ai[i+1] - ai[i];
4622       if (anzi) {
4623         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4624         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4625         nrows++;
4626       }
4627     }
4628     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4629     k++;
4630     buf_si += len_si[proc];
4631   }
4632 
4633   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4634   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4635 
4636   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4637   for (i=0; i<merge->nrecv; i++) {
4638     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4639   }
4640 
4641   ierr = PetscFree(len_si);CHKERRQ(ierr);
4642   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4643   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4644   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4645   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4646   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4647   ierr = PetscFree(status);CHKERRQ(ierr);
4648 
4649   /* compute a local seq matrix in each processor */
4650   /*----------------------------------------------*/
4651   /* allocate bi array and free space for accumulating nonzero column info */
4652   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4653   bi[0] = 0;
4654 
4655   /* create and initialize a linked list */
4656   nlnk = N+1;
4657   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4658 
4659   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4660   len  = ai[owners[rank+1]] - ai[owners[rank]];
4661   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4662 
4663   current_space = free_space;
4664 
4665   /* determine symbolic info for each local row */
4666   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4667 
4668   for (k=0; k<merge->nrecv; k++) {
4669     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4670     nrows       = *buf_ri_k[k];
4671     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4672     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4673   }
4674 
4675   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4676   len  = 0;
4677   for (i=0; i<m; i++) {
4678     bnzi = 0;
4679     /* add local non-zero cols of this proc's seqmat into lnk */
4680     arow  = owners[rank] + i;
4681     anzi  = ai[arow+1] - ai[arow];
4682     aj    = a->j + ai[arow];
4683     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4684     bnzi += nlnk;
4685     /* add received col data into lnk */
4686     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4687       if (i == *nextrow[k]) { /* i-th row */
4688         anzi  = *(nextai[k]+1) - *nextai[k];
4689         aj    = buf_rj[k] + *nextai[k];
4690         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4691         bnzi += nlnk;
4692         nextrow[k]++; nextai[k]++;
4693       }
4694     }
4695     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4696 
4697     /* if free space is not available, make more free space */
4698     if (current_space->local_remaining<bnzi) {
4699       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4700       nspacedouble++;
4701     }
4702     /* copy data into free space, then initialize lnk */
4703     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4704     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4705 
4706     current_space->array           += bnzi;
4707     current_space->local_used      += bnzi;
4708     current_space->local_remaining -= bnzi;
4709 
4710     bi[i+1] = bi[i] + bnzi;
4711   }
4712 
4713   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4714 
4715   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4716   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4717   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4718 
4719   /* create symbolic parallel matrix B_mpi */
4720   /*---------------------------------------*/
4721   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4722   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4723   if (n==PETSC_DECIDE) {
4724     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4725   } else {
4726     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4727   }
4728   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4729   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4730   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4731   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4732   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4733 
4734   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4735   B_mpi->assembled    = PETSC_FALSE;
4736   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4737   merge->bi           = bi;
4738   merge->bj           = bj;
4739   merge->buf_ri       = buf_ri;
4740   merge->buf_rj       = buf_rj;
4741   merge->coi          = NULL;
4742   merge->coj          = NULL;
4743   merge->owners_co    = NULL;
4744 
4745   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4746 
4747   /* attach the supporting struct to B_mpi for reuse */
4748   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4749   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4750   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4751   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4752   *mpimat = B_mpi;
4753 
4754   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4755   PetscFunctionReturn(0);
4756 }
4757 
4758 /*@C
4759       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4760                  matrices from each processor
4761 
4762     Collective on MPI_Comm
4763 
4764    Input Parameters:
4765 +    comm - the communicators the parallel matrix will live on
4766 .    seqmat - the input sequential matrices
4767 .    m - number of local rows (or PETSC_DECIDE)
4768 .    n - number of local columns (or PETSC_DECIDE)
4769 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4770 
4771    Output Parameter:
4772 .    mpimat - the parallel matrix generated
4773 
4774     Level: advanced
4775 
4776    Notes:
4777      The dimensions of the sequential matrix in each processor MUST be the same.
4778      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4779      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4780 @*/
4781 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4782 {
4783   PetscErrorCode ierr;
4784   PetscMPIInt    size;
4785 
4786   PetscFunctionBegin;
4787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4788   if (size == 1) {
4789     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4790     if (scall == MAT_INITIAL_MATRIX) {
4791       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4792     } else {
4793       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4794     }
4795     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4796     PetscFunctionReturn(0);
4797   }
4798   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4799   if (scall == MAT_INITIAL_MATRIX) {
4800     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4801   }
4802   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4803   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4804   PetscFunctionReturn(0);
4805 }
4806 
4807 /*@
4808      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4809           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4810           with MatGetSize()
4811 
4812     Not Collective
4813 
4814    Input Parameters:
4815 +    A - the matrix
4816 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4817 
4818    Output Parameter:
4819 .    A_loc - the local sequential matrix generated
4820 
4821     Level: developer
4822 
4823 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4824 
4825 @*/
4826 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4827 {
4828   PetscErrorCode ierr;
4829   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4830   Mat_SeqAIJ     *mat,*a,*b;
4831   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4832   MatScalar      *aa,*ba,*cam;
4833   PetscScalar    *ca;
4834   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4835   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4836   PetscBool      match;
4837   MPI_Comm       comm;
4838   PetscMPIInt    size;
4839 
4840   PetscFunctionBegin;
4841   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4842   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4843   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4844   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4845   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4846 
4847   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4848   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4849   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4850   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4851   aa = a->a; ba = b->a;
4852   if (scall == MAT_INITIAL_MATRIX) {
4853     if (size == 1) {
4854       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4855       PetscFunctionReturn(0);
4856     }
4857 
4858     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4859     ci[0] = 0;
4860     for (i=0; i<am; i++) {
4861       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4862     }
4863     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4864     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4865     k    = 0;
4866     for (i=0; i<am; i++) {
4867       ncols_o = bi[i+1] - bi[i];
4868       ncols_d = ai[i+1] - ai[i];
4869       /* off-diagonal portion of A */
4870       for (jo=0; jo<ncols_o; jo++) {
4871         col = cmap[*bj];
4872         if (col >= cstart) break;
4873         cj[k]   = col; bj++;
4874         ca[k++] = *ba++;
4875       }
4876       /* diagonal portion of A */
4877       for (j=0; j<ncols_d; j++) {
4878         cj[k]   = cstart + *aj++;
4879         ca[k++] = *aa++;
4880       }
4881       /* off-diagonal portion of A */
4882       for (j=jo; j<ncols_o; j++) {
4883         cj[k]   = cmap[*bj++];
4884         ca[k++] = *ba++;
4885       }
4886     }
4887     /* put together the new matrix */
4888     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4889     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4890     /* Since these are PETSc arrays, change flags to free them as necessary. */
4891     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4892     mat->free_a  = PETSC_TRUE;
4893     mat->free_ij = PETSC_TRUE;
4894     mat->nonew   = 0;
4895   } else if (scall == MAT_REUSE_MATRIX) {
4896     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4897     ci = mat->i; cj = mat->j; cam = mat->a;
4898     for (i=0; i<am; i++) {
4899       /* off-diagonal portion of A */
4900       ncols_o = bi[i+1] - bi[i];
4901       for (jo=0; jo<ncols_o; jo++) {
4902         col = cmap[*bj];
4903         if (col >= cstart) break;
4904         *cam++ = *ba++; bj++;
4905       }
4906       /* diagonal portion of A */
4907       ncols_d = ai[i+1] - ai[i];
4908       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4909       /* off-diagonal portion of A */
4910       for (j=jo; j<ncols_o; j++) {
4911         *cam++ = *ba++; bj++;
4912       }
4913     }
4914   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4915   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 /*@C
4920      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4921 
4922     Not Collective
4923 
4924    Input Parameters:
4925 +    A - the matrix
4926 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4927 -    row, col - index sets of rows and columns to extract (or NULL)
4928 
4929    Output Parameter:
4930 .    A_loc - the local sequential matrix generated
4931 
4932     Level: developer
4933 
4934 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4935 
4936 @*/
4937 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4938 {
4939   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4940   PetscErrorCode ierr;
4941   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4942   IS             isrowa,iscola;
4943   Mat            *aloc;
4944   PetscBool      match;
4945 
4946   PetscFunctionBegin;
4947   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4948   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4949   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4950   if (!row) {
4951     start = A->rmap->rstart; end = A->rmap->rend;
4952     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4953   } else {
4954     isrowa = *row;
4955   }
4956   if (!col) {
4957     start = A->cmap->rstart;
4958     cmap  = a->garray;
4959     nzA   = a->A->cmap->n;
4960     nzB   = a->B->cmap->n;
4961     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4962     ncols = 0;
4963     for (i=0; i<nzB; i++) {
4964       if (cmap[i] < start) idx[ncols++] = cmap[i];
4965       else break;
4966     }
4967     imark = i;
4968     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4969     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4970     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4971   } else {
4972     iscola = *col;
4973   }
4974   if (scall != MAT_INITIAL_MATRIX) {
4975     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4976     aloc[0] = *A_loc;
4977   }
4978   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4979   *A_loc = aloc[0];
4980   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4981   if (!row) {
4982     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4983   }
4984   if (!col) {
4985     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4986   }
4987   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4988   PetscFunctionReturn(0);
4989 }
4990 
4991 /*@C
4992     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4993 
4994     Collective on Mat
4995 
4996    Input Parameters:
4997 +    A,B - the matrices in mpiaij format
4998 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4999 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5000 
5001    Output Parameter:
5002 +    rowb, colb - index sets of rows and columns of B to extract
5003 -    B_seq - the sequential matrix generated
5004 
5005     Level: developer
5006 
5007 @*/
5008 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5009 {
5010   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5011   PetscErrorCode ierr;
5012   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5013   IS             isrowb,iscolb;
5014   Mat            *bseq=NULL;
5015 
5016   PetscFunctionBegin;
5017   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5018     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5019   }
5020   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5021 
5022   if (scall == MAT_INITIAL_MATRIX) {
5023     start = A->cmap->rstart;
5024     cmap  = a->garray;
5025     nzA   = a->A->cmap->n;
5026     nzB   = a->B->cmap->n;
5027     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5028     ncols = 0;
5029     for (i=0; i<nzB; i++) {  /* row < local row index */
5030       if (cmap[i] < start) idx[ncols++] = cmap[i];
5031       else break;
5032     }
5033     imark = i;
5034     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5035     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5036     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5037     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5038   } else {
5039     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5040     isrowb  = *rowb; iscolb = *colb;
5041     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5042     bseq[0] = *B_seq;
5043   }
5044   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5045   *B_seq = bseq[0];
5046   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5047   if (!rowb) {
5048     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5049   } else {
5050     *rowb = isrowb;
5051   }
5052   if (!colb) {
5053     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5054   } else {
5055     *colb = iscolb;
5056   }
5057   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5058   PetscFunctionReturn(0);
5059 }
5060 
5061 /*
5062     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5063     of the OFF-DIAGONAL portion of local A
5064 
5065     Collective on Mat
5066 
5067    Input Parameters:
5068 +    A,B - the matrices in mpiaij format
5069 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5070 
5071    Output Parameter:
5072 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5073 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5074 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5075 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5076 
5077     Level: developer
5078 
5079 */
5080 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5081 {
5082   VecScatter_MPI_General *gen_to,*gen_from;
5083   PetscErrorCode         ierr;
5084   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5085   Mat_SeqAIJ             *b_oth;
5086   VecScatter             ctx =a->Mvctx;
5087   MPI_Comm               comm;
5088   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5089   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5090   PetscInt               *rvalues,*svalues;
5091   MatScalar              *b_otha,*bufa,*bufA;
5092   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5093   MPI_Request            *rwaits = NULL,*swaits = NULL;
5094   MPI_Status             *sstatus,rstatus;
5095   PetscMPIInt            jj,size;
5096   PetscInt               *cols,sbs,rbs;
5097   PetscScalar            *vals;
5098 
5099   PetscFunctionBegin;
5100   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5101   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5102 
5103   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5104     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5105   }
5106   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5108 
5109   if (size == 1) {
5110     startsj_s = NULL;
5111     bufa_ptr  = NULL;
5112     *B_oth    = NULL;
5113     PetscFunctionReturn(0);
5114   }
5115 
5116   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5117   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5118   nrecvs   = gen_from->n;
5119   nsends   = gen_to->n;
5120 
5121   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5122   srow    = gen_to->indices;    /* local row index to be sent */
5123   sstarts = gen_to->starts;
5124   sprocs  = gen_to->procs;
5125   sstatus = gen_to->sstatus;
5126   sbs     = gen_to->bs;
5127   rstarts = gen_from->starts;
5128   rprocs  = gen_from->procs;
5129   rbs     = gen_from->bs;
5130 
5131   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5132   if (scall == MAT_INITIAL_MATRIX) {
5133     /* i-array */
5134     /*---------*/
5135     /*  post receives */
5136     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5137     for (i=0; i<nrecvs; i++) {
5138       rowlen = rvalues + rstarts[i]*rbs;
5139       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5140       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5141     }
5142 
5143     /* pack the outgoing message */
5144     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5145 
5146     sstartsj[0] = 0;
5147     rstartsj[0] = 0;
5148     len         = 0; /* total length of j or a array to be sent */
5149     k           = 0;
5150     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5151     for (i=0; i<nsends; i++) {
5152       rowlen = svalues + sstarts[i]*sbs;
5153       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5154       for (j=0; j<nrows; j++) {
5155         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5156         for (l=0; l<sbs; l++) {
5157           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5158 
5159           rowlen[j*sbs+l] = ncols;
5160 
5161           len += ncols;
5162           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5163         }
5164         k++;
5165       }
5166       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5167 
5168       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5169     }
5170     /* recvs and sends of i-array are completed */
5171     i = nrecvs;
5172     while (i--) {
5173       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5174     }
5175     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5176     ierr = PetscFree(svalues);CHKERRQ(ierr);
5177 
5178     /* allocate buffers for sending j and a arrays */
5179     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5180     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5181 
5182     /* create i-array of B_oth */
5183     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5184 
5185     b_othi[0] = 0;
5186     len       = 0; /* total length of j or a array to be received */
5187     k         = 0;
5188     for (i=0; i<nrecvs; i++) {
5189       rowlen = rvalues + rstarts[i]*rbs;
5190       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5191       for (j=0; j<nrows; j++) {
5192         b_othi[k+1] = b_othi[k] + rowlen[j];
5193         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5194         k++;
5195       }
5196       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5197     }
5198     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5199 
5200     /* allocate space for j and a arrrays of B_oth */
5201     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5202     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5203 
5204     /* j-array */
5205     /*---------*/
5206     /*  post receives of j-array */
5207     for (i=0; i<nrecvs; i++) {
5208       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5209       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5210     }
5211 
5212     /* pack the outgoing message j-array */
5213     k = 0;
5214     for (i=0; i<nsends; i++) {
5215       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5216       bufJ  = bufj+sstartsj[i];
5217       for (j=0; j<nrows; j++) {
5218         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5219         for (ll=0; ll<sbs; ll++) {
5220           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5221           for (l=0; l<ncols; l++) {
5222             *bufJ++ = cols[l];
5223           }
5224           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5225         }
5226       }
5227       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5228     }
5229 
5230     /* recvs and sends of j-array are completed */
5231     i = nrecvs;
5232     while (i--) {
5233       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5234     }
5235     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5236   } else if (scall == MAT_REUSE_MATRIX) {
5237     sstartsj = *startsj_s;
5238     rstartsj = *startsj_r;
5239     bufa     = *bufa_ptr;
5240     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5241     b_otha   = b_oth->a;
5242   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5243 
5244   /* a-array */
5245   /*---------*/
5246   /*  post receives of a-array */
5247   for (i=0; i<nrecvs; i++) {
5248     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5249     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5250   }
5251 
5252   /* pack the outgoing message a-array */
5253   k = 0;
5254   for (i=0; i<nsends; i++) {
5255     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5256     bufA  = bufa+sstartsj[i];
5257     for (j=0; j<nrows; j++) {
5258       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5259       for (ll=0; ll<sbs; ll++) {
5260         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5261         for (l=0; l<ncols; l++) {
5262           *bufA++ = vals[l];
5263         }
5264         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5265       }
5266     }
5267     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5268   }
5269   /* recvs and sends of a-array are completed */
5270   i = nrecvs;
5271   while (i--) {
5272     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5273   }
5274   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5275   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5276 
5277   if (scall == MAT_INITIAL_MATRIX) {
5278     /* put together the new matrix */
5279     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5280 
5281     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5282     /* Since these are PETSc arrays, change flags to free them as necessary. */
5283     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5284     b_oth->free_a  = PETSC_TRUE;
5285     b_oth->free_ij = PETSC_TRUE;
5286     b_oth->nonew   = 0;
5287 
5288     ierr = PetscFree(bufj);CHKERRQ(ierr);
5289     if (!startsj_s || !bufa_ptr) {
5290       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5291       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5292     } else {
5293       *startsj_s = sstartsj;
5294       *startsj_r = rstartsj;
5295       *bufa_ptr  = bufa;
5296     }
5297   }
5298   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5299   PetscFunctionReturn(0);
5300 }
5301 
5302 /*@C
5303   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5304 
5305   Not Collective
5306 
5307   Input Parameters:
5308 . A - The matrix in mpiaij format
5309 
5310   Output Parameter:
5311 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5312 . colmap - A map from global column index to local index into lvec
5313 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5314 
5315   Level: developer
5316 
5317 @*/
5318 #if defined(PETSC_USE_CTABLE)
5319 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5320 #else
5321 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5322 #endif
5323 {
5324   Mat_MPIAIJ *a;
5325 
5326   PetscFunctionBegin;
5327   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5328   PetscValidPointer(lvec, 2);
5329   PetscValidPointer(colmap, 3);
5330   PetscValidPointer(multScatter, 4);
5331   a = (Mat_MPIAIJ*) A->data;
5332   if (lvec) *lvec = a->lvec;
5333   if (colmap) *colmap = a->colmap;
5334   if (multScatter) *multScatter = a->Mvctx;
5335   PetscFunctionReturn(0);
5336 }
5337 
5338 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5339 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5340 #if defined(PETSC_HAVE_MKL_SPARSE)
5341 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5342 #endif
5343 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5344 #if defined(PETSC_HAVE_ELEMENTAL)
5345 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5346 #endif
5347 #if defined(PETSC_HAVE_HYPRE)
5348 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5349 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5350 #endif
5351 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5352 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIELL(Mat,MatType,MatReuse,Mat*);
5353 
5354 /*
5355     Computes (B'*A')' since computing B*A directly is untenable
5356 
5357                n                       p                          p
5358         (              )       (              )         (                  )
5359       m (      A       )  *  n (       B      )   =   m (         C        )
5360         (              )       (              )         (                  )
5361 
5362 */
5363 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5364 {
5365   PetscErrorCode ierr;
5366   Mat            At,Bt,Ct;
5367 
5368   PetscFunctionBegin;
5369   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5370   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5371   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5372   ierr = MatDestroy(&At);CHKERRQ(ierr);
5373   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5374   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5375   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5376   PetscFunctionReturn(0);
5377 }
5378 
5379 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5380 {
5381   PetscErrorCode ierr;
5382   PetscInt       m=A->rmap->n,n=B->cmap->n;
5383   Mat            Cmat;
5384 
5385   PetscFunctionBegin;
5386   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5387   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5388   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5389   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5390   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5391   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5392   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5393   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5394 
5395   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5396 
5397   *C = Cmat;
5398   PetscFunctionReturn(0);
5399 }
5400 
5401 /* ----------------------------------------------------------------*/
5402 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5403 {
5404   PetscErrorCode ierr;
5405 
5406   PetscFunctionBegin;
5407   if (scall == MAT_INITIAL_MATRIX) {
5408     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5409     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5410     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5411   }
5412   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5413   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5414   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5415   PetscFunctionReturn(0);
5416 }
5417 
5418 /*MC
5419    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5420 
5421    Options Database Keys:
5422 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5423 
5424   Level: beginner
5425 
5426 .seealso: MatCreateAIJ()
5427 M*/
5428 
5429 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5430 {
5431   Mat_MPIAIJ     *b;
5432   PetscErrorCode ierr;
5433   PetscMPIInt    size;
5434 
5435   PetscFunctionBegin;
5436   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5437 
5438   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5439   B->data       = (void*)b;
5440   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5441   B->assembled  = PETSC_FALSE;
5442   B->insertmode = NOT_SET_VALUES;
5443   b->size       = size;
5444 
5445   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5446 
5447   /* build cache for off array entries formed */
5448   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5449 
5450   b->donotstash  = PETSC_FALSE;
5451   b->colmap      = 0;
5452   b->garray      = 0;
5453   b->roworiented = PETSC_TRUE;
5454 
5455   /* stuff used for matrix vector multiply */
5456   b->lvec  = NULL;
5457   b->Mvctx = NULL;
5458 
5459   /* stuff for MatGetRow() */
5460   b->rowindices   = 0;
5461   b->rowvalues    = 0;
5462   b->getrowactive = PETSC_FALSE;
5463 
5464   /* flexible pointer used in CUSP/CUSPARSE classes */
5465   b->spptr = NULL;
5466 
5467   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5468   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5469   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5470   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5471   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5472   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5473   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5474   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5475 #if defined(PETSC_HAVE_MKL_SPARSE)
5476   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5477 #endif
5478   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5479   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5480 #if defined(PETSC_HAVE_ELEMENTAL)
5481   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5482 #endif
5483 #if defined(PETSC_HAVE_HYPRE)
5484   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5485 #endif
5486   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5487   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiell_C",MatConvert_MPIAIJ_MPIELL);CHKERRQ(ierr);
5488   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5489   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5490   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5491 #if defined(PETSC_HAVE_HYPRE)
5492   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5493 #endif
5494   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5495   PetscFunctionReturn(0);
5496 }
5497 
5498 /*@C
5499      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5500          and "off-diagonal" part of the matrix in CSR format.
5501 
5502    Collective on MPI_Comm
5503 
5504    Input Parameters:
5505 +  comm - MPI communicator
5506 .  m - number of local rows (Cannot be PETSC_DECIDE)
5507 .  n - This value should be the same as the local size used in creating the
5508        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5509        calculated if N is given) For square matrices n is almost always m.
5510 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5511 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5512 .   i - row indices for "diagonal" portion of matrix
5513 .   j - column indices
5514 .   a - matrix values
5515 .   oi - row indices for "off-diagonal" portion of matrix
5516 .   oj - column indices
5517 -   oa - matrix values
5518 
5519    Output Parameter:
5520 .   mat - the matrix
5521 
5522    Level: advanced
5523 
5524    Notes:
5525        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5526        must free the arrays once the matrix has been destroyed and not before.
5527 
5528        The i and j indices are 0 based
5529 
5530        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5531 
5532        This sets local rows and cannot be used to set off-processor values.
5533 
5534        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5535        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5536        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5537        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5538        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5539        communication if it is known that only local entries will be set.
5540 
5541 .keywords: matrix, aij, compressed row, sparse, parallel
5542 
5543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5544           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5545 @*/
5546 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5547 {
5548   PetscErrorCode ierr;
5549   Mat_MPIAIJ     *maij;
5550 
5551   PetscFunctionBegin;
5552   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5553   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5554   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5555   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5556   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5557   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5558   maij = (Mat_MPIAIJ*) (*mat)->data;
5559 
5560   (*mat)->preallocated = PETSC_TRUE;
5561 
5562   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5563   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5564 
5565   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5566   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5567 
5568   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5569   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5570   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5571   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5572 
5573   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5574   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5575   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5576   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5577   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5578   PetscFunctionReturn(0);
5579 }
5580 
5581 /*
5582     Special version for direct calls from Fortran
5583 */
5584 #include <petsc/private/fortranimpl.h>
5585 
5586 /* Change these macros so can be used in void function */
5587 #undef CHKERRQ
5588 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5589 #undef SETERRQ2
5590 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5591 #undef SETERRQ3
5592 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5593 #undef SETERRQ
5594 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5595 
5596 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5597 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5598 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5599 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5600 #else
5601 #endif
5602 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5603 {
5604   Mat            mat  = *mmat;
5605   PetscInt       m    = *mm, n = *mn;
5606   InsertMode     addv = *maddv;
5607   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5608   PetscScalar    value;
5609   PetscErrorCode ierr;
5610 
5611   MatCheckPreallocated(mat,1);
5612   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5613 
5614 #if defined(PETSC_USE_DEBUG)
5615   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5616 #endif
5617   {
5618     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5619     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5620     PetscBool roworiented = aij->roworiented;
5621 
5622     /* Some Variables required in the macro */
5623     Mat        A                 = aij->A;
5624     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5625     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5626     MatScalar  *aa               = a->a;
5627     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5628     Mat        B                 = aij->B;
5629     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5630     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5631     MatScalar  *ba               = b->a;
5632 
5633     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5634     PetscInt  nonew = a->nonew;
5635     MatScalar *ap1,*ap2;
5636 
5637     PetscFunctionBegin;
5638     for (i=0; i<m; i++) {
5639       if (im[i] < 0) continue;
5640 #if defined(PETSC_USE_DEBUG)
5641       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5642 #endif
5643       if (im[i] >= rstart && im[i] < rend) {
5644         row      = im[i] - rstart;
5645         lastcol1 = -1;
5646         rp1      = aj + ai[row];
5647         ap1      = aa + ai[row];
5648         rmax1    = aimax[row];
5649         nrow1    = ailen[row];
5650         low1     = 0;
5651         high1    = nrow1;
5652         lastcol2 = -1;
5653         rp2      = bj + bi[row];
5654         ap2      = ba + bi[row];
5655         rmax2    = bimax[row];
5656         nrow2    = bilen[row];
5657         low2     = 0;
5658         high2    = nrow2;
5659 
5660         for (j=0; j<n; j++) {
5661           if (roworiented) value = v[i*n+j];
5662           else value = v[i+j*m];
5663           if (in[j] >= cstart && in[j] < cend) {
5664             col = in[j] - cstart;
5665             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5666             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5667           } else if (in[j] < 0) continue;
5668 #if defined(PETSC_USE_DEBUG)
5669           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5670 #endif
5671           else {
5672             if (mat->was_assembled) {
5673               if (!aij->colmap) {
5674                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5675               }
5676 #if defined(PETSC_USE_CTABLE)
5677               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5678               col--;
5679 #else
5680               col = aij->colmap[in[j]] - 1;
5681 #endif
5682               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5683               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5684                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5685                 col  =  in[j];
5686                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5687                 B     = aij->B;
5688                 b     = (Mat_SeqAIJ*)B->data;
5689                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5690                 rp2   = bj + bi[row];
5691                 ap2   = ba + bi[row];
5692                 rmax2 = bimax[row];
5693                 nrow2 = bilen[row];
5694                 low2  = 0;
5695                 high2 = nrow2;
5696                 bm    = aij->B->rmap->n;
5697                 ba    = b->a;
5698               }
5699             } else col = in[j];
5700             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5701           }
5702         }
5703       } else if (!aij->donotstash) {
5704         if (roworiented) {
5705           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5706         } else {
5707           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5708         }
5709       }
5710     }
5711   }
5712   PetscFunctionReturnVoid();
5713 }
5714 
5715