xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fc8a9adeb7fcdc98711d755fa2dc544ddccf0f3e)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
218   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) { \
444             ap1[_i] += value;   \
445             /* Not sure LogFlops will slow dow the code or not */ \
446             (void)PetscLogFlops(1.0);   \
447            } \
448           else                    ap1[_i] = value; \
449           goto a_noinsert; \
450         } \
451       }  \
452       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
453       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
454       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
455       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
456       N = nrow1++ - 1; a->nz++; high1++; \
457       /* shift up all the later entries in this row */ \
458       for (ii=N; ii>=_i; ii--) { \
459         rp1[ii+1] = rp1[ii]; \
460         ap1[ii+1] = ap1[ii]; \
461       } \
462       rp1[_i] = col;  \
463       ap1[_i] = value;  \
464       A->nonzerostate++;\
465       a_noinsert: ; \
466       ailen[row] = nrow1; \
467 }
468 
469 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
470   { \
471     if (col <= lastcol2) low2 = 0;                        \
472     else high2 = nrow2;                                   \
473     lastcol2 = col;                                       \
474     while (high2-low2 > 5) {                              \
475       t = (low2+high2)/2;                                 \
476       if (rp2[t] > col) high2 = t;                        \
477       else             low2  = t;                         \
478     }                                                     \
479     for (_i=low2; _i<high2; _i++) {                       \
480       if (rp2[_i] > col) break;                           \
481       if (rp2[_i] == col) {                               \
482         if (addv == ADD_VALUES) {                         \
483           ap2[_i] += value;                               \
484           (void)PetscLogFlops(1.0);                       \
485         }                                                 \
486         else                    ap2[_i] = value;          \
487         goto b_noinsert;                                  \
488       }                                                   \
489     }                                                     \
490     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
491     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
492     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
493     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
494     N = nrow2++ - 1; b->nz++; high2++;                    \
495     /* shift up all the later entries in this row */      \
496     for (ii=N; ii>=_i; ii--) {                            \
497       rp2[ii+1] = rp2[ii];                                \
498       ap2[ii+1] = ap2[ii];                                \
499     }                                                     \
500     rp2[_i] = col;                                        \
501     ap2[_i] = value;                                      \
502     B->nonzerostate++;                                    \
503     b_noinsert: ;                                         \
504     bilen[row] = nrow2;                                   \
505   }
506 
507 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
508 {
509   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
510   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
511   PetscErrorCode ierr;
512   PetscInt       l,*garray = mat->garray,diag;
513 
514   PetscFunctionBegin;
515   /* code only works for square matrices A */
516 
517   /* find size of row to the left of the diagonal part */
518   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
519   row  = row - diag;
520   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
521     if (garray[b->j[b->i[row]+l]] > diag) break;
522   }
523   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
524 
525   /* diagonal part */
526   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
527 
528   /* right of diagonal part */
529   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
530   PetscFunctionReturn(0);
531 }
532 
533 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
534 {
535   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
536   PetscScalar    value;
537   PetscErrorCode ierr;
538   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
539   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
540   PetscBool      roworiented = aij->roworiented;
541 
542   /* Some Variables required in the macro */
543   Mat        A                 = aij->A;
544   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
545   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
546   MatScalar  *aa               = a->a;
547   PetscBool  ignorezeroentries = a->ignorezeroentries;
548   Mat        B                 = aij->B;
549   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
550   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
551   MatScalar  *ba               = b->a;
552 
553   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
554   PetscInt  nonew;
555   MatScalar *ap1,*ap2;
556 
557   PetscFunctionBegin;
558   for (i=0; i<m; i++) {
559     if (im[i] < 0) continue;
560 #if defined(PETSC_USE_DEBUG)
561     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
562 #endif
563     if (im[i] >= rstart && im[i] < rend) {
564       row      = im[i] - rstart;
565       lastcol1 = -1;
566       rp1      = aj + ai[row];
567       ap1      = aa + ai[row];
568       rmax1    = aimax[row];
569       nrow1    = ailen[row];
570       low1     = 0;
571       high1    = nrow1;
572       lastcol2 = -1;
573       rp2      = bj + bi[row];
574       ap2      = ba + bi[row];
575       rmax2    = bimax[row];
576       nrow2    = bilen[row];
577       low2     = 0;
578       high2    = nrow2;
579 
580       for (j=0; j<n; j++) {
581         if (roworiented) value = v[i*n+j];
582         else             value = v[i+j*m];
583         if (in[j] >= cstart && in[j] < cend) {
584           col   = in[j] - cstart;
585           nonew = a->nonew;
586           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
587           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
588         } else if (in[j] < 0) continue;
589 #if defined(PETSC_USE_DEBUG)
590         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
591 #endif
592         else {
593           if (mat->was_assembled) {
594             if (!aij->colmap) {
595               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
596             }
597 #if defined(PETSC_USE_CTABLE)
598             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
599             col--;
600 #else
601             col = aij->colmap[in[j]] - 1;
602 #endif
603             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
604               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
605               col  =  in[j];
606               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
607               B     = aij->B;
608               b     = (Mat_SeqAIJ*)B->data;
609               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
610               rp2   = bj + bi[row];
611               ap2   = ba + bi[row];
612               rmax2 = bimax[row];
613               nrow2 = bilen[row];
614               low2  = 0;
615               high2 = nrow2;
616               bm    = aij->B->rmap->n;
617               ba    = b->a;
618             } else if (col < 0) {
619               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
620                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
621               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
622             }
623           } else col = in[j];
624           nonew = b->nonew;
625           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
626         }
627       }
628     } else {
629       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
630       if (!aij->donotstash) {
631         mat->assembled = PETSC_FALSE;
632         if (roworiented) {
633           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
634         } else {
635           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
636         }
637       }
638     }
639   }
640   PetscFunctionReturn(0);
641 }
642 
643 /*
644     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
645     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
646     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
647 */
648 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
649 {
650   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
651   Mat            A           = aij->A; /* diagonal part of the matrix */
652   Mat            B           = aij->B; /* offdiagonal part of the matrix */
653   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
654   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
655   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
656   PetscInt       *ailen      = a->ilen,*aj = a->j;
657   PetscInt       *bilen      = b->ilen,*bj = b->j;
658   PetscInt       am          = aij->A->rmap->n,j;
659   PetscInt       diag_so_far = 0,dnz;
660   PetscInt       offd_so_far = 0,onz;
661 
662   PetscFunctionBegin;
663   /* Iterate over all rows of the matrix */
664   for (j=0; j<am; j++) {
665     dnz = onz = 0;
666     /*  Iterate over all non-zero columns of the current row */
667     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
668       /* If column is in the diagonal */
669       if (mat_j[col] >= cstart && mat_j[col] < cend) {
670         aj[diag_so_far++] = mat_j[col] - cstart;
671         dnz++;
672       } else { /* off-diagonal entries */
673         bj[offd_so_far++] = mat_j[col];
674         onz++;
675       }
676     }
677     ailen[j] = dnz;
678     bilen[j] = onz;
679   }
680   PetscFunctionReturn(0);
681 }
682 
683 /*
684     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
685     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
686     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
687     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
688     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
689 */
690 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
691 {
692   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
693   Mat            A      = aij->A; /* diagonal part of the matrix */
694   Mat            B      = aij->B; /* offdiagonal part of the matrix */
695   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
696   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
697   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
698   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
699   PetscInt       *ailen = a->ilen,*aj = a->j;
700   PetscInt       *bilen = b->ilen,*bj = b->j;
701   PetscInt       am     = aij->A->rmap->n,j;
702   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
703   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
704   PetscScalar    *aa = a->a,*ba = b->a;
705 
706   PetscFunctionBegin;
707   /* Iterate over all rows of the matrix */
708   for (j=0; j<am; j++) {
709     dnz_row = onz_row = 0;
710     rowstart_offd = full_offd_i[j];
711     rowstart_diag = full_diag_i[j];
712     /*  Iterate over all non-zero columns of the current row */
713     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
714       /* If column is in the diagonal */
715       if (mat_j[col] >= cstart && mat_j[col] < cend) {
716         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
717         aa[rowstart_diag+dnz_row] = mat_a[col];
718         dnz_row++;
719       } else { /* off-diagonal entries */
720         bj[rowstart_offd+onz_row] = mat_j[col];
721         ba[rowstart_offd+onz_row] = mat_a[col];
722         onz_row++;
723       }
724     }
725     ailen[j] = dnz_row;
726     bilen[j] = onz_row;
727   }
728   PetscFunctionReturn(0);
729 }
730 
731 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
732 {
733   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
734   PetscErrorCode ierr;
735   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
736   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
737 
738   PetscFunctionBegin;
739   for (i=0; i<m; i++) {
740     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
741     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
742     if (idxm[i] >= rstart && idxm[i] < rend) {
743       row = idxm[i] - rstart;
744       for (j=0; j<n; j++) {
745         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
746         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
747         if (idxn[j] >= cstart && idxn[j] < cend) {
748           col  = idxn[j] - cstart;
749           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
750         } else {
751           if (!aij->colmap) {
752             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
753           }
754 #if defined(PETSC_USE_CTABLE)
755           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
756           col--;
757 #else
758           col = aij->colmap[idxn[j]] - 1;
759 #endif
760           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
761           else {
762             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
763           }
764         }
765       }
766     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
767   }
768   PetscFunctionReturn(0);
769 }
770 
771 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
772 
773 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
774 {
775   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
776   PetscErrorCode ierr;
777   PetscInt       nstash,reallocs;
778 
779   PetscFunctionBegin;
780   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
781 
782   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
783   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
784   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
789 {
790   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
791   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
792   PetscErrorCode ierr;
793   PetscMPIInt    n;
794   PetscInt       i,j,rstart,ncols,flg;
795   PetscInt       *row,*col;
796   PetscBool      other_disassembled;
797   PetscScalar    *val;
798 
799   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
800 
801   PetscFunctionBegin;
802   if (!aij->donotstash && !mat->nooffprocentries) {
803     while (1) {
804       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
805       if (!flg) break;
806 
807       for (i=0; i<n; ) {
808         /* Now identify the consecutive vals belonging to the same row */
809         for (j=i,rstart=row[j]; j<n; j++) {
810           if (row[j] != rstart) break;
811         }
812         if (j < n) ncols = j-i;
813         else       ncols = n-i;
814         /* Now assemble all these values with a single function call */
815         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
816 
817         i = j;
818       }
819     }
820     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
821   }
822   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
823   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
824 
825   /* determine if any processor has disassembled, if so we must
826      also disassemble ourselfs, in order that we may reassemble. */
827   /*
828      if nonzero structure of submatrix B cannot change then we know that
829      no processor disassembled thus we can skip this stuff
830   */
831   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
832     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
833     if (mat->was_assembled && !other_disassembled) {
834       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
835     }
836   }
837   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
838     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
839   }
840   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
841   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
842   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
843 
844   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
845 
846   aij->rowvalues = 0;
847 
848   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
849   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
850 
851   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
852   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
853     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
854     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
855   }
856   PetscFunctionReturn(0);
857 }
858 
859 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
860 {
861   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
862   PetscErrorCode ierr;
863 
864   PetscFunctionBegin;
865   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
866   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
871 {
872   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
873   PetscObjectState sA, sB;
874   PetscInt        *lrows;
875   PetscInt         r, len;
876   PetscBool        cong, lch, gch;
877   PetscErrorCode   ierr;
878 
879   PetscFunctionBegin;
880   /* get locally owned rows */
881   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
882   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
883   /* fix right hand side if needed */
884   if (x && b) {
885     const PetscScalar *xx;
886     PetscScalar       *bb;
887 
888     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
889     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
890     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
891     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
892     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
893     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
894   }
895 
896   sA = mat->A->nonzerostate;
897   sB = mat->B->nonzerostate;
898 
899   if (diag != 0.0 && cong) {
900     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
901     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
902   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
903     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
904     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
905     PetscInt   nnwA, nnwB;
906     PetscBool  nnzA, nnzB;
907 
908     nnwA = aijA->nonew;
909     nnwB = aijB->nonew;
910     nnzA = aijA->keepnonzeropattern;
911     nnzB = aijB->keepnonzeropattern;
912     if (!nnzA) {
913       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
914       aijA->nonew = 0;
915     }
916     if (!nnzB) {
917       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
918       aijB->nonew = 0;
919     }
920     /* Must zero here before the next loop */
921     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
922     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
923     for (r = 0; r < len; ++r) {
924       const PetscInt row = lrows[r] + A->rmap->rstart;
925       if (row >= A->cmap->N) continue;
926       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
927     }
928     aijA->nonew = nnwA;
929     aijB->nonew = nnwB;
930   } else {
931     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
932     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
933   }
934   ierr = PetscFree(lrows);CHKERRQ(ierr);
935   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
936   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
937 
938   /* reduce nonzerostate */
939   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
940   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941   if (gch) A->nonzerostate++;
942   PetscFunctionReturn(0);
943 }
944 
945 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
946 {
947   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
948   PetscErrorCode    ierr;
949   PetscMPIInt       n = A->rmap->n;
950   PetscInt          i,j,r,m,p = 0,len = 0;
951   PetscInt          *lrows,*owners = A->rmap->range;
952   PetscSFNode       *rrows;
953   PetscSF           sf;
954   const PetscScalar *xx;
955   PetscScalar       *bb,*mask;
956   Vec               xmask,lmask;
957   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
958   const PetscInt    *aj, *ii,*ridx;
959   PetscScalar       *aa;
960 
961   PetscFunctionBegin;
962   /* Create SF where leaves are input rows and roots are owned rows */
963   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
964   for (r = 0; r < n; ++r) lrows[r] = -1;
965   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
966   for (r = 0; r < N; ++r) {
967     const PetscInt idx   = rows[r];
968     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
969     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
970       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
971     }
972     rrows[r].rank  = p;
973     rrows[r].index = rows[r] - owners[p];
974   }
975   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
976   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
977   /* Collect flags for rows to be zeroed */
978   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
979   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
980   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
981   /* Compress and put in row numbers */
982   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
983   /* zero diagonal part of matrix */
984   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
985   /* handle off diagonal part of matrix */
986   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
987   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
988   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
989   for (i=0; i<len; i++) bb[lrows[i]] = 1;
990   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
991   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
994   if (x && b) { /* this code is buggy when the row and column layout don't match */
995     PetscBool cong;
996 
997     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
998     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
999     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1000     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1002     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1003   }
1004   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1005   /* remove zeroed rows of off diagonal matrix */
1006   ii = aij->i;
1007   for (i=0; i<len; i++) {
1008     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1009   }
1010   /* loop over all elements of off process part of matrix zeroing removed columns*/
1011   if (aij->compressedrow.use) {
1012     m    = aij->compressedrow.nrows;
1013     ii   = aij->compressedrow.i;
1014     ridx = aij->compressedrow.rindex;
1015     for (i=0; i<m; i++) {
1016       n  = ii[i+1] - ii[i];
1017       aj = aij->j + ii[i];
1018       aa = aij->a + ii[i];
1019 
1020       for (j=0; j<n; j++) {
1021         if (PetscAbsScalar(mask[*aj])) {
1022           if (b) bb[*ridx] -= *aa*xx[*aj];
1023           *aa = 0.0;
1024         }
1025         aa++;
1026         aj++;
1027       }
1028       ridx++;
1029     }
1030   } else { /* do not use compressed row format */
1031     m = l->B->rmap->n;
1032     for (i=0; i<m; i++) {
1033       n  = ii[i+1] - ii[i];
1034       aj = aij->j + ii[i];
1035       aa = aij->a + ii[i];
1036       for (j=0; j<n; j++) {
1037         if (PetscAbsScalar(mask[*aj])) {
1038           if (b) bb[i] -= *aa*xx[*aj];
1039           *aa = 0.0;
1040         }
1041         aa++;
1042         aj++;
1043       }
1044     }
1045   }
1046   if (x && b) {
1047     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1048     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1049   }
1050   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1051   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1052   ierr = PetscFree(lrows);CHKERRQ(ierr);
1053 
1054   /* only change matrix nonzero state if pattern was allowed to be changed */
1055   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1056     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1057     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1058   }
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1065   PetscErrorCode ierr;
1066   PetscInt       nt;
1067   VecScatter     Mvctx = a->Mvctx;
1068 
1069   PetscFunctionBegin;
1070   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1071   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1072 
1073   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1074   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1075   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1076   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084 
1085   PetscFunctionBegin;
1086   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1091 {
1092   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   VecScatter     Mvctx = a->Mvctx;
1095 
1096   PetscFunctionBegin;
1097   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1098   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1100   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1101   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   /* do nondiagonal part */
1112   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1113   /* do local part */
1114   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1115   /* add partial results together */
1116   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1122 {
1123   MPI_Comm       comm;
1124   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1125   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1126   IS             Me,Notme;
1127   PetscErrorCode ierr;
1128   PetscInt       M,N,first,last,*notme,i;
1129   PetscBool      lf;
1130   PetscMPIInt    size;
1131 
1132   PetscFunctionBegin;
1133   /* Easy test: symmetric diagonal block */
1134   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1135   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1136   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1137   if (!*f) PetscFunctionReturn(0);
1138   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1139   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1140   if (size == 1) PetscFunctionReturn(0);
1141 
1142   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1143   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1144   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1145   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1146   for (i=0; i<first; i++) notme[i] = i;
1147   for (i=last; i<M; i++) notme[i-last+first] = i;
1148   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1149   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1150   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1151   Aoff = Aoffs[0];
1152   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1153   Boff = Boffs[0];
1154   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1155   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1156   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1157   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1158   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1159   ierr = PetscFree(notme);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1164 {
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1173 {
1174   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   /* do nondiagonal part */
1179   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1180   /* do local part */
1181   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1182   /* add partial results together */
1183   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1184   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /*
1189   This only works correctly for square matrices where the subblock A->A is the
1190    diagonal block
1191 */
1192 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1193 {
1194   PetscErrorCode ierr;
1195   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1196 
1197   PetscFunctionBegin;
1198   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1199   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1200   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1205 {
1206   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1211   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1216 {
1217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221 #if defined(PETSC_USE_LOG)
1222   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1223 #endif
1224   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1225   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1226   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1227   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1228 #if defined(PETSC_USE_CTABLE)
1229   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1230 #else
1231   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1234   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1235   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1236   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1237   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1238   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1239   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1240 
1241   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1250 #if defined(PETSC_HAVE_ELEMENTAL)
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1252 #endif
1253 #if defined(PETSC_HAVE_HYPRE)
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1256 #endif
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1265   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1266   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1267   PetscErrorCode ierr;
1268   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1269   int            fd;
1270   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1271   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1272   PetscScalar    *column_values;
1273   PetscInt       message_count,flowcontrolcount;
1274   FILE           *file;
1275 
1276   PetscFunctionBegin;
1277   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1278   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1279   nz   = A->nz + B->nz;
1280   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1281   if (!rank) {
1282     header[0] = MAT_FILE_CLASSID;
1283     header[1] = mat->rmap->N;
1284     header[2] = mat->cmap->N;
1285 
1286     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1288     /* get largest number of rows any processor has */
1289     rlen  = mat->rmap->n;
1290     range = mat->rmap->range;
1291     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1292   } else {
1293     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1294     rlen = mat->rmap->n;
1295   }
1296 
1297   /* load up the local row counts */
1298   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1299   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1300 
1301   /* store the row lengths to the file */
1302   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1303   if (!rank) {
1304     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       rlen = range[i+1] - range[i];
1308       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1310     }
1311     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1312   } else {
1313     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1314     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1316   }
1317   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1318 
1319   /* load up the local column indices */
1320   nzmax = nz; /* th processor needs space a largest processor needs */
1321   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1323   cnt   = 0;
1324   for (i=0; i<mat->rmap->n; i++) {
1325     for (j=B->i[i]; j<B->i[i+1]; j++) {
1326       if ((col = garray[B->j[j]]) > cstart) break;
1327       column_indices[cnt++] = col;
1328     }
1329     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1330     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1331   }
1332   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1333 
1334   /* store the column indices to the file */
1335   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1336   if (!rank) {
1337     MPI_Status status;
1338     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1339     for (i=1; i<size; i++) {
1340       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1341       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1342       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1343       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1344       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1345     }
1346     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1347   } else {
1348     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1349     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1350     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1351     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1352   }
1353   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1354 
1355   /* load up the local column values */
1356   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1357   cnt  = 0;
1358   for (i=0; i<mat->rmap->n; i++) {
1359     for (j=B->i[i]; j<B->i[i+1]; j++) {
1360       if (garray[B->j[j]] > cstart) break;
1361       column_values[cnt++] = B->a[j];
1362     }
1363     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1364     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1365   }
1366   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1367 
1368   /* store the column values to the file */
1369   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1370   if (!rank) {
1371     MPI_Status status;
1372     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1373     for (i=1; i<size; i++) {
1374       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1375       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1376       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1378       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1379     }
1380     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1381   } else {
1382     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1383     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1384     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1385     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1386   }
1387   ierr = PetscFree(column_values);CHKERRQ(ierr);
1388 
1389   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1390   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #include <petscdraw.h>
1395 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1396 {
1397   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1398   PetscErrorCode    ierr;
1399   PetscMPIInt       rank = aij->rank,size = aij->size;
1400   PetscBool         isdraw,iascii,isbinary;
1401   PetscViewer       sviewer;
1402   PetscViewerFormat format;
1403 
1404   PetscFunctionBegin;
1405   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1407   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1408   if (iascii) {
1409     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1410     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1411       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1412       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1413       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1414       for (i=0; i<(PetscInt)size; i++) {
1415         nmax = PetscMax(nmax,nz[i]);
1416         nmin = PetscMin(nmin,nz[i]);
1417         navg += nz[i];
1418       }
1419       ierr = PetscFree(nz);CHKERRQ(ierr);
1420       navg = navg/size;
1421       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1422       PetscFunctionReturn(0);
1423     }
1424     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1425     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1426       MatInfo   info;
1427       PetscBool inodes;
1428 
1429       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1430       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1431       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1433       if (!inodes) {
1434         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1435                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1436       } else {
1437         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1438                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1439       }
1440       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1441       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1442       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1443       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1444       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1445       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1446       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1447       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1448       PetscFunctionReturn(0);
1449     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1450       PetscInt inodecount,inodelimit,*inodes;
1451       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1452       if (inodes) {
1453         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1454       } else {
1455         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1456       }
1457       PetscFunctionReturn(0);
1458     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1459       PetscFunctionReturn(0);
1460     }
1461   } else if (isbinary) {
1462     if (size == 1) {
1463       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1464       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1465     } else {
1466       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1467     }
1468     PetscFunctionReturn(0);
1469   } else if (iascii && size == 1) {
1470     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1471     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1472     PetscFunctionReturn(0);
1473   } else if (isdraw) {
1474     PetscDraw draw;
1475     PetscBool isnull;
1476     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1477     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1478     if (isnull) PetscFunctionReturn(0);
1479   }
1480 
1481   { /* assemble the entire matrix onto first processor */
1482     Mat A = NULL, Av;
1483     IS  isrow,iscol;
1484 
1485     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1486     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1487     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1488     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1489 /*  The commented code uses MatCreateSubMatrices instead */
1490 /*
1491     Mat *AA, A = NULL, Av;
1492     IS  isrow,iscol;
1493 
1494     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1495     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1496     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1497     if (!rank) {
1498        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1499        A    = AA[0];
1500        Av   = AA[0];
1501     }
1502     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1503 */
1504     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1505     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1506     /*
1507        Everyone has to call to draw the matrix since the graphics waits are
1508        synchronized across all processors that share the PetscDraw object
1509     */
1510     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1511     if (!rank) {
1512       if (((PetscObject)mat)->name) {
1513         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       }
1515       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1516     }
1517     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1518     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1519     ierr = MatDestroy(&A);CHKERRQ(ierr);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1525 {
1526   PetscErrorCode ierr;
1527   PetscBool      iascii,isdraw,issocket,isbinary;
1528 
1529   PetscFunctionBegin;
1530   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1531   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1532   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1533   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1534   if (iascii || isdraw || isbinary || issocket) {
1535     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1541 {
1542   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1543   PetscErrorCode ierr;
1544   Vec            bb1 = 0;
1545   PetscBool      hasop;
1546 
1547   PetscFunctionBegin;
1548   if (flag == SOR_APPLY_UPPER) {
1549     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1550     PetscFunctionReturn(0);
1551   }
1552 
1553   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1554     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1555   }
1556 
1557   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1558     if (flag & SOR_ZERO_INITIAL_GUESS) {
1559       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1560       its--;
1561     }
1562 
1563     while (its--) {
1564       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1565       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1566 
1567       /* update rhs: bb1 = bb - B*x */
1568       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1569       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1570 
1571       /* local sweep */
1572       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1573     }
1574   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1575     if (flag & SOR_ZERO_INITIAL_GUESS) {
1576       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1577       its--;
1578     }
1579     while (its--) {
1580       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1581       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582 
1583       /* update rhs: bb1 = bb - B*x */
1584       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1585       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1586 
1587       /* local sweep */
1588       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1589     }
1590   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1591     if (flag & SOR_ZERO_INITIAL_GUESS) {
1592       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1593       its--;
1594     }
1595     while (its--) {
1596       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1597       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1598 
1599       /* update rhs: bb1 = bb - B*x */
1600       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1601       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1602 
1603       /* local sweep */
1604       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1605     }
1606   } else if (flag & SOR_EISENSTAT) {
1607     Vec xx1;
1608 
1609     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1610     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1611 
1612     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614     if (!mat->diag) {
1615       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1616       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1617     }
1618     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1619     if (hasop) {
1620       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1621     } else {
1622       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1623     }
1624     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1625 
1626     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1627 
1628     /* local sweep */
1629     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1630     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1631     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1632   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1633 
1634   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1635 
1636   matin->factorerrortype = mat->A->factorerrortype;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1641 {
1642   Mat            aA,aB,Aperm;
1643   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1644   PetscScalar    *aa,*ba;
1645   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1646   PetscSF        rowsf,sf;
1647   IS             parcolp = NULL;
1648   PetscBool      done;
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1653   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1654   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1655   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1656 
1657   /* Invert row permutation to find out where my rows should go */
1658   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1659   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1660   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1661   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1662   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1663   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1664 
1665   /* Invert column permutation to find out where my columns should go */
1666   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1667   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1668   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1669   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1670   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1671   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1672   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1673 
1674   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1675   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1676   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1677 
1678   /* Find out where my gcols should go */
1679   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1680   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1681   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1682   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1683   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1684   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1685   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1686   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1687 
1688   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1689   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1690   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1691   for (i=0; i<m; i++) {
1692     PetscInt row = rdest[i],rowner;
1693     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1694     for (j=ai[i]; j<ai[i+1]; j++) {
1695       PetscInt cowner,col = cdest[aj[j]];
1696       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1697       if (rowner == cowner) dnnz[i]++;
1698       else onnz[i]++;
1699     }
1700     for (j=bi[i]; j<bi[i+1]; j++) {
1701       PetscInt cowner,col = gcdest[bj[j]];
1702       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1703       if (rowner == cowner) dnnz[i]++;
1704       else onnz[i]++;
1705     }
1706   }
1707   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1708   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1709   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1710   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1711   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1712 
1713   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1714   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1715   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1716   for (i=0; i<m; i++) {
1717     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1718     PetscInt j0,rowlen;
1719     rowlen = ai[i+1] - ai[i];
1720     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1721       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1722       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1723     }
1724     rowlen = bi[i+1] - bi[i];
1725     for (j0=j=0; j<rowlen; j0=j) {
1726       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1727       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1728     }
1729   }
1730   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1731   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1732   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1733   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1734   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1735   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1736   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1737   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1738   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1739   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1740   *B = Aperm;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1745 {
1746   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1751   if (ghosts) *ghosts = aij->garray;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1756 {
1757   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1758   Mat            A    = mat->A,B = mat->B;
1759   PetscErrorCode ierr;
1760   PetscReal      isend[5],irecv[5];
1761 
1762   PetscFunctionBegin;
1763   info->block_size = 1.0;
1764   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1765 
1766   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1767   isend[3] = info->memory;  isend[4] = info->mallocs;
1768 
1769   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1770 
1771   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1772   isend[3] += info->memory;  isend[4] += info->mallocs;
1773   if (flag == MAT_LOCAL) {
1774     info->nz_used      = isend[0];
1775     info->nz_allocated = isend[1];
1776     info->nz_unneeded  = isend[2];
1777     info->memory       = isend[3];
1778     info->mallocs      = isend[4];
1779   } else if (flag == MAT_GLOBAL_MAX) {
1780     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1781 
1782     info->nz_used      = irecv[0];
1783     info->nz_allocated = irecv[1];
1784     info->nz_unneeded  = irecv[2];
1785     info->memory       = irecv[3];
1786     info->mallocs      = irecv[4];
1787   } else if (flag == MAT_GLOBAL_SUM) {
1788     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1789 
1790     info->nz_used      = irecv[0];
1791     info->nz_allocated = irecv[1];
1792     info->nz_unneeded  = irecv[2];
1793     info->memory       = irecv[3];
1794     info->mallocs      = irecv[4];
1795   }
1796   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1797   info->fill_ratio_needed = 0;
1798   info->factor_mallocs    = 0;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1803 {
1804   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1805   PetscErrorCode ierr;
1806 
1807   PetscFunctionBegin;
1808   switch (op) {
1809   case MAT_NEW_NONZERO_LOCATIONS:
1810   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1811   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1812   case MAT_KEEP_NONZERO_PATTERN:
1813   case MAT_NEW_NONZERO_LOCATION_ERR:
1814   case MAT_USE_INODES:
1815   case MAT_IGNORE_ZERO_ENTRIES:
1816     MatCheckPreallocated(A,1);
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_ROW_ORIENTED:
1821     MatCheckPreallocated(A,1);
1822     a->roworiented = flg;
1823 
1824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1825     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1826     break;
1827   case MAT_NEW_DIAGONALS:
1828     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1829     break;
1830   case MAT_IGNORE_OFF_PROC_ENTRIES:
1831     a->donotstash = flg;
1832     break;
1833   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1834   case MAT_SPD:
1835   case MAT_SYMMETRIC:
1836   case MAT_STRUCTURALLY_SYMMETRIC:
1837   case MAT_HERMITIAN:
1838   case MAT_SYMMETRY_ETERNAL:
1839     break;
1840   case MAT_SUBMAT_SINGLEIS:
1841     A->submat_singleis = flg;
1842     break;
1843   case MAT_STRUCTURE_ONLY:
1844     /* The option is handled directly by MatSetOption() */
1845     break;
1846   default:
1847     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1848   }
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1853 {
1854   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1855   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1856   PetscErrorCode ierr;
1857   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1858   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1859   PetscInt       *cmap,*idx_p;
1860 
1861   PetscFunctionBegin;
1862   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1863   mat->getrowactive = PETSC_TRUE;
1864 
1865   if (!mat->rowvalues && (idx || v)) {
1866     /*
1867         allocate enough space to hold information from the longest row.
1868     */
1869     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1870     PetscInt   max = 1,tmp;
1871     for (i=0; i<matin->rmap->n; i++) {
1872       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1873       if (max < tmp) max = tmp;
1874     }
1875     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1876   }
1877 
1878   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1879   lrow = row - rstart;
1880 
1881   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1882   if (!v)   {pvA = 0; pvB = 0;}
1883   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1884   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1885   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1886   nztot = nzA + nzB;
1887 
1888   cmap = mat->garray;
1889   if (v  || idx) {
1890     if (nztot) {
1891       /* Sort by increasing column numbers, assuming A and B already sorted */
1892       PetscInt imark = -1;
1893       if (v) {
1894         *v = v_p = mat->rowvalues;
1895         for (i=0; i<nzB; i++) {
1896           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1897           else break;
1898         }
1899         imark = i;
1900         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1901         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1902       }
1903       if (idx) {
1904         *idx = idx_p = mat->rowindices;
1905         if (imark > -1) {
1906           for (i=0; i<imark; i++) {
1907             idx_p[i] = cmap[cworkB[i]];
1908           }
1909         } else {
1910           for (i=0; i<nzB; i++) {
1911             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1912             else break;
1913           }
1914           imark = i;
1915         }
1916         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1917         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1918       }
1919     } else {
1920       if (idx) *idx = 0;
1921       if (v)   *v   = 0;
1922     }
1923   }
1924   *nz  = nztot;
1925   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1931 {
1932   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1933 
1934   PetscFunctionBegin;
1935   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1936   aij->getrowactive = PETSC_FALSE;
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1941 {
1942   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1943   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1944   PetscErrorCode ierr;
1945   PetscInt       i,j,cstart = mat->cmap->rstart;
1946   PetscReal      sum = 0.0;
1947   MatScalar      *v;
1948 
1949   PetscFunctionBegin;
1950   if (aij->size == 1) {
1951     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1952   } else {
1953     if (type == NORM_FROBENIUS) {
1954       v = amat->a;
1955       for (i=0; i<amat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       v = bmat->a;
1959       for (i=0; i<bmat->nz; i++) {
1960         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1961       }
1962       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1963       *norm = PetscSqrtReal(*norm);
1964       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1965     } else if (type == NORM_1) { /* max column norm */
1966       PetscReal *tmp,*tmp2;
1967       PetscInt  *jj,*garray = aij->garray;
1968       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1969       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1970       *norm = 0.0;
1971       v     = amat->a; jj = amat->j;
1972       for (j=0; j<amat->nz; j++) {
1973         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1974       }
1975       v = bmat->a; jj = bmat->j;
1976       for (j=0; j<bmat->nz; j++) {
1977         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1978       }
1979       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1980       for (j=0; j<mat->cmap->N; j++) {
1981         if (tmp2[j] > *norm) *norm = tmp2[j];
1982       }
1983       ierr = PetscFree(tmp);CHKERRQ(ierr);
1984       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1985       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1986     } else if (type == NORM_INFINITY) { /* max row norm */
1987       PetscReal ntemp = 0.0;
1988       for (j=0; j<aij->A->rmap->n; j++) {
1989         v   = amat->a + amat->i[j];
1990         sum = 0.0;
1991         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         v = bmat->a + bmat->i[j];
1995         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1996           sum += PetscAbsScalar(*v); v++;
1997         }
1998         if (sum > ntemp) ntemp = sum;
1999       }
2000       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2001       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2002     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2008 {
2009   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2010   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2011   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2012   PetscErrorCode ierr;
2013   Mat            B,A_diag,*B_diag;
2014   MatScalar      *array;
2015 
2016   PetscFunctionBegin;
2017   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2018   ai = Aloc->i; aj = Aloc->j;
2019   bi = Bloc->i; bj = Bloc->j;
2020   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2021     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2022     PetscSFNode          *oloc;
2023     PETSC_UNUSED PetscSF sf;
2024 
2025     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2026     /* compute d_nnz for preallocation */
2027     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2028     for (i=0; i<ai[ma]; i++) {
2029       d_nnz[aj[i]]++;
2030     }
2031     /* compute local off-diagonal contributions */
2032     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2033     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2034     /* map those to global */
2035     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2036     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2037     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2038     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2039     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2040     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2041     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2042 
2043     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2044     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2045     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2046     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2047     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2048     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2049   } else {
2050     B    = *matout;
2051     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2052   }
2053 
2054   b           = (Mat_MPIAIJ*)B->data;
2055   A_diag      = a->A;
2056   B_diag      = &b->A;
2057   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2058   A_diag_ncol = A_diag->cmap->N;
2059   B_diag_ilen = sub_B_diag->ilen;
2060   B_diag_i    = sub_B_diag->i;
2061 
2062   /* Set ilen for diagonal of B */
2063   for (i=0; i<A_diag_ncol; i++) {
2064     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2065   }
2066 
2067   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2068   very quickly (=without using MatSetValues), because all writes are local. */
2069   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2070 
2071   /* copy over the B part */
2072   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2073   array = Bloc->a;
2074   row   = A->rmap->rstart;
2075   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2076   cols_tmp = cols;
2077   for (i=0; i<mb; i++) {
2078     ncol = bi[i+1]-bi[i];
2079     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2080     row++;
2081     array += ncol; cols_tmp += ncol;
2082   }
2083   ierr = PetscFree(cols);CHKERRQ(ierr);
2084 
2085   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2086   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2087   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2088     *matout = B;
2089   } else {
2090     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2096 {
2097   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2098   Mat            a    = aij->A,b = aij->B;
2099   PetscErrorCode ierr;
2100   PetscInt       s1,s2,s3;
2101 
2102   PetscFunctionBegin;
2103   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2104   if (rr) {
2105     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2106     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2107     /* Overlap communication with computation. */
2108     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2109   }
2110   if (ll) {
2111     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2112     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2113     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2114   }
2115   /* scale  the diagonal block */
2116   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2117 
2118   if (rr) {
2119     /* Do a scatter end and then right scale the off-diagonal block */
2120     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2121     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2127 {
2128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2129   PetscErrorCode ierr;
2130 
2131   PetscFunctionBegin;
2132   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2137 {
2138   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2139   Mat            a,b,c,d;
2140   PetscBool      flg;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   a = matA->A; b = matA->B;
2145   c = matB->A; d = matB->B;
2146 
2147   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2148   if (flg) {
2149     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2150   }
2151   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2156 {
2157   PetscErrorCode ierr;
2158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2159   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2160 
2161   PetscFunctionBegin;
2162   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2163   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2164     /* because of the column compression in the off-processor part of the matrix a->B,
2165        the number of columns in a->B and b->B may be different, hence we cannot call
2166        the MatCopy() directly on the two parts. If need be, we can provide a more
2167        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2168        then copying the submatrices */
2169     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2170   } else {
2171     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2172     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2173   }
2174   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2179 {
2180   PetscErrorCode ierr;
2181 
2182   PetscFunctionBegin;
2183   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 /*
2188    Computes the number of nonzeros per row needed for preallocation when X and Y
2189    have different nonzero structure.
2190 */
2191 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2192 {
2193   PetscInt       i,j,k,nzx,nzy;
2194 
2195   PetscFunctionBegin;
2196   /* Set the number of nonzeros in the new matrix */
2197   for (i=0; i<m; i++) {
2198     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2199     nzx = xi[i+1] - xi[i];
2200     nzy = yi[i+1] - yi[i];
2201     nnz[i] = 0;
2202     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2203       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2204       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2205       nnz[i]++;
2206     }
2207     for (; k<nzy; k++) nnz[i]++;
2208   }
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2213 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2214 {
2215   PetscErrorCode ierr;
2216   PetscInt       m = Y->rmap->N;
2217   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2218   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2219 
2220   PetscFunctionBegin;
2221   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2226 {
2227   PetscErrorCode ierr;
2228   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2229   PetscBLASInt   bnz,one=1;
2230   Mat_SeqAIJ     *x,*y;
2231 
2232   PetscFunctionBegin;
2233   if (str == SAME_NONZERO_PATTERN) {
2234     PetscScalar alpha = a;
2235     x    = (Mat_SeqAIJ*)xx->A->data;
2236     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2237     y    = (Mat_SeqAIJ*)yy->A->data;
2238     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2239     x    = (Mat_SeqAIJ*)xx->B->data;
2240     y    = (Mat_SeqAIJ*)yy->B->data;
2241     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2242     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2243     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2244   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2245     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2246   } else {
2247     Mat      B;
2248     PetscInt *nnz_d,*nnz_o;
2249     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2250     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2251     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2252     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2253     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2254     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2255     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2256     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2257     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2258     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2259     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2260     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2261     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2262     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2263   }
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2268 
2269 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2270 {
2271 #if defined(PETSC_USE_COMPLEX)
2272   PetscErrorCode ierr;
2273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2274 
2275   PetscFunctionBegin;
2276   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2277   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2278 #else
2279   PetscFunctionBegin;
2280 #endif
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2285 {
2286   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2291   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2296 {
2297   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2298   PetscErrorCode ierr;
2299 
2300   PetscFunctionBegin;
2301   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2302   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2307 {
2308   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2309   PetscErrorCode ierr;
2310   PetscInt       i,*idxb = 0;
2311   PetscScalar    *va,*vb;
2312   Vec            vtmp;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2316   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2317   if (idx) {
2318     for (i=0; i<A->rmap->n; i++) {
2319       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2320     }
2321   }
2322 
2323   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2324   if (idx) {
2325     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2326   }
2327   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2328   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2329 
2330   for (i=0; i<A->rmap->n; i++) {
2331     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2332       va[i] = vb[i];
2333       if (idx) idx[i] = a->garray[idxb[i]];
2334     }
2335   }
2336 
2337   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2338   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2339   ierr = PetscFree(idxb);CHKERRQ(ierr);
2340   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2345 {
2346   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2347   PetscErrorCode ierr;
2348   PetscInt       i,*idxb = 0;
2349   PetscScalar    *va,*vb;
2350   Vec            vtmp;
2351 
2352   PetscFunctionBegin;
2353   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2354   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2355   if (idx) {
2356     for (i=0; i<A->cmap->n; i++) {
2357       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2358     }
2359   }
2360 
2361   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2362   if (idx) {
2363     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2364   }
2365   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2366   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2367 
2368   for (i=0; i<A->rmap->n; i++) {
2369     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2370       va[i] = vb[i];
2371       if (idx) idx[i] = a->garray[idxb[i]];
2372     }
2373   }
2374 
2375   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2376   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2377   ierr = PetscFree(idxb);CHKERRQ(ierr);
2378   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2383 {
2384   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2385   PetscInt       n      = A->rmap->n;
2386   PetscInt       cstart = A->cmap->rstart;
2387   PetscInt       *cmap  = mat->garray;
2388   PetscInt       *diagIdx, *offdiagIdx;
2389   Vec            diagV, offdiagV;
2390   PetscScalar    *a, *diagA, *offdiagA;
2391   PetscInt       r;
2392   PetscErrorCode ierr;
2393 
2394   PetscFunctionBegin;
2395   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2396   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2397   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2398   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2399   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2401   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2402   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2403   for (r = 0; r < n; ++r) {
2404     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2405       a[r]   = diagA[r];
2406       idx[r] = cstart + diagIdx[r];
2407     } else {
2408       a[r]   = offdiagA[r];
2409       idx[r] = cmap[offdiagIdx[r]];
2410     }
2411   }
2412   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2413   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2414   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2415   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2416   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2417   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2422 {
2423   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2424   PetscInt       n      = A->rmap->n;
2425   PetscInt       cstart = A->cmap->rstart;
2426   PetscInt       *cmap  = mat->garray;
2427   PetscInt       *diagIdx, *offdiagIdx;
2428   Vec            diagV, offdiagV;
2429   PetscScalar    *a, *diagA, *offdiagA;
2430   PetscInt       r;
2431   PetscErrorCode ierr;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2436   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2437   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2438   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2439   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2440   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2441   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2442   for (r = 0; r < n; ++r) {
2443     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2444       a[r]   = diagA[r];
2445       idx[r] = cstart + diagIdx[r];
2446     } else {
2447       a[r]   = offdiagA[r];
2448       idx[r] = cmap[offdiagIdx[r]];
2449     }
2450   }
2451   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2452   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2453   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2454   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2455   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2456   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2461 {
2462   PetscErrorCode ierr;
2463   Mat            *dummy;
2464 
2465   PetscFunctionBegin;
2466   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2467   *newmat = *dummy;
2468   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2473 {
2474   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2479   A->factorerrortype = a->A->factorerrortype;
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2484 {
2485   PetscErrorCode ierr;
2486   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2487 
2488   PetscFunctionBegin;
2489   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2490   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2491   if (x->assembled) {
2492     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2493   } else {
2494     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2495   }
2496   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2497   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2502 {
2503   PetscFunctionBegin;
2504   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2505   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 /*@
2510    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2511 
2512    Collective on Mat
2513 
2514    Input Parameters:
2515 +    A - the matrix
2516 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2517 
2518  Level: advanced
2519 
2520 @*/
2521 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2522 {
2523   PetscErrorCode       ierr;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2531 {
2532   PetscErrorCode       ierr;
2533   PetscBool            sc = PETSC_FALSE,flg;
2534 
2535   PetscFunctionBegin;
2536   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2537   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2538   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2539   if (flg) {
2540     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2541   }
2542   ierr = PetscOptionsTail();CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2547 {
2548   PetscErrorCode ierr;
2549   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2550   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2551 
2552   PetscFunctionBegin;
2553   if (!Y->preallocated) {
2554     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2555   } else if (!aij->nz) {
2556     PetscInt nonew = aij->nonew;
2557     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2558     aij->nonew = nonew;
2559   }
2560   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2565 {
2566   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2567   PetscErrorCode ierr;
2568 
2569   PetscFunctionBegin;
2570   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2571   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2572   if (d) {
2573     PetscInt rstart;
2574     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2575     *d += rstart;
2576 
2577   }
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2582 {
2583   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2584   PetscErrorCode ierr;
2585 
2586   PetscFunctionBegin;
2587   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 /* -------------------------------------------------------------------*/
2592 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2593                                        MatGetRow_MPIAIJ,
2594                                        MatRestoreRow_MPIAIJ,
2595                                        MatMult_MPIAIJ,
2596                                 /* 4*/ MatMultAdd_MPIAIJ,
2597                                        MatMultTranspose_MPIAIJ,
2598                                        MatMultTransposeAdd_MPIAIJ,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                 /*10*/ 0,
2603                                        0,
2604                                        0,
2605                                        MatSOR_MPIAIJ,
2606                                        MatTranspose_MPIAIJ,
2607                                 /*15*/ MatGetInfo_MPIAIJ,
2608                                        MatEqual_MPIAIJ,
2609                                        MatGetDiagonal_MPIAIJ,
2610                                        MatDiagonalScale_MPIAIJ,
2611                                        MatNorm_MPIAIJ,
2612                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2613                                        MatAssemblyEnd_MPIAIJ,
2614                                        MatSetOption_MPIAIJ,
2615                                        MatZeroEntries_MPIAIJ,
2616                                 /*24*/ MatZeroRows_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                 /*29*/ MatSetUp_MPIAIJ,
2622                                        0,
2623                                        0,
2624                                        MatGetDiagonalBlock_MPIAIJ,
2625                                        0,
2626                                 /*34*/ MatDuplicate_MPIAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                 /*39*/ MatAXPY_MPIAIJ,
2632                                        MatCreateSubMatrices_MPIAIJ,
2633                                        MatIncreaseOverlap_MPIAIJ,
2634                                        MatGetValues_MPIAIJ,
2635                                        MatCopy_MPIAIJ,
2636                                 /*44*/ MatGetRowMax_MPIAIJ,
2637                                        MatScale_MPIAIJ,
2638                                        MatShift_MPIAIJ,
2639                                        MatDiagonalSet_MPIAIJ,
2640                                        MatZeroRowsColumns_MPIAIJ,
2641                                 /*49*/ MatSetRandom_MPIAIJ,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2647                                        0,
2648                                        MatSetUnfactored_MPIAIJ,
2649                                        MatPermute_MPIAIJ,
2650                                        0,
2651                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2652                                        MatDestroy_MPIAIJ,
2653                                        MatView_MPIAIJ,
2654                                        0,
2655                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2656                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2657                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2662                                        MatGetRowMinAbs_MPIAIJ,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*75*/ MatFDColoringApply_AIJ,
2668                                        MatSetFromOptions_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                        MatFindZeroDiagonals_MPIAIJ,
2672                                 /*80*/ 0,
2673                                        0,
2674                                        0,
2675                                 /*83*/ MatLoad_MPIAIJ,
2676                                        MatIsSymmetric_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2682                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2683                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2684                                        MatPtAP_MPIAIJ_MPIAIJ,
2685                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2686                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                 /*99*/ 0,
2692                                        0,
2693                                        0,
2694                                        MatConjugate_MPIAIJ,
2695                                        0,
2696                                 /*104*/MatSetValuesRow_MPIAIJ,
2697                                        MatRealPart_MPIAIJ,
2698                                        MatImaginaryPart_MPIAIJ,
2699                                        0,
2700                                        0,
2701                                 /*109*/0,
2702                                        0,
2703                                        MatGetRowMin_MPIAIJ,
2704                                        0,
2705                                        MatMissingDiagonal_MPIAIJ,
2706                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2707                                        0,
2708                                        MatGetGhosts_MPIAIJ,
2709                                        0,
2710                                        0,
2711                                 /*119*/0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        MatGetMultiProcBlock_MPIAIJ,
2716                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2717                                        MatGetColumnNorms_MPIAIJ,
2718                                        MatInvertBlockDiagonal_MPIAIJ,
2719                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2720                                        MatCreateSubMatricesMPI_MPIAIJ,
2721                                 /*129*/0,
2722                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2723                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2724                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2725                                        0,
2726                                 /*134*/0,
2727                                        0,
2728                                        MatRARt_MPIAIJ_MPIAIJ,
2729                                        0,
2730                                        0,
2731                                 /*139*/MatSetBlockSizes_MPIAIJ,
2732                                        0,
2733                                        0,
2734                                        MatFDColoringSetUp_MPIXAIJ,
2735                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2736                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2737 };
2738 
2739 /* ----------------------------------------------------------------------------------------*/
2740 
2741 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2742 {
2743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2748   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2753 {
2754   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2759   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2764 {
2765   Mat_MPIAIJ     *b;
2766   PetscErrorCode ierr;
2767   PetscMPIInt    size;
2768 
2769   PetscFunctionBegin;
2770   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2772   b = (Mat_MPIAIJ*)B->data;
2773 
2774 #if defined(PETSC_USE_CTABLE)
2775   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2776 #else
2777   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2778 #endif
2779   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2780   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2781   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2782 
2783   /* Because the B will have been resized we simply destroy it and create a new one each time */
2784   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2785   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2786   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2787   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2788   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2789   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2790   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2791 
2792   if (!B->preallocated) {
2793     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2794     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2795     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2796     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2797     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2798   }
2799 
2800   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2801   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2802   B->preallocated  = PETSC_TRUE;
2803   B->was_assembled = PETSC_FALSE;
2804   B->assembled     = PETSC_FALSE;
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2809 {
2810   Mat_MPIAIJ     *b;
2811   PetscErrorCode ierr;
2812 
2813   PetscFunctionBegin;
2814   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2815   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2817   b = (Mat_MPIAIJ*)B->data;
2818 
2819 #if defined(PETSC_USE_CTABLE)
2820   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2821 #else
2822   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2823 #endif
2824   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2825   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2826   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2827 
2828   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2829   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2830   B->preallocated  = PETSC_TRUE;
2831   B->was_assembled = PETSC_FALSE;
2832   B->assembled = PETSC_FALSE;
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2837 {
2838   Mat            mat;
2839   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2840   PetscErrorCode ierr;
2841 
2842   PetscFunctionBegin;
2843   *newmat = 0;
2844   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2845   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2846   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2847   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2848   a       = (Mat_MPIAIJ*)mat->data;
2849 
2850   mat->factortype   = matin->factortype;
2851   mat->assembled    = PETSC_TRUE;
2852   mat->insertmode   = NOT_SET_VALUES;
2853   mat->preallocated = PETSC_TRUE;
2854 
2855   a->size         = oldmat->size;
2856   a->rank         = oldmat->rank;
2857   a->donotstash   = oldmat->donotstash;
2858   a->roworiented  = oldmat->roworiented;
2859   a->rowindices   = 0;
2860   a->rowvalues    = 0;
2861   a->getrowactive = PETSC_FALSE;
2862 
2863   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2864   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2865 
2866   if (oldmat->colmap) {
2867 #if defined(PETSC_USE_CTABLE)
2868     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2869 #else
2870     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2871     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2872     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2873 #endif
2874   } else a->colmap = 0;
2875   if (oldmat->garray) {
2876     PetscInt len;
2877     len  = oldmat->B->cmap->n;
2878     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2879     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2880     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2881   } else a->garray = 0;
2882 
2883   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2884   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2885   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2886   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2887 
2888   if (oldmat->Mvctx_mpi1) {
2889     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2890     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2891   }
2892 
2893   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2894   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2895   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2896   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2897   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2898   *newmat = mat;
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2903 {
2904   PetscBool      isbinary, ishdf5;
2905   PetscErrorCode ierr;
2906 
2907   PetscFunctionBegin;
2908   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2909   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2910   /* force binary viewer to load .info file if it has not yet done so */
2911   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2912   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2913   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2914   if (isbinary) {
2915     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2916   } else if (ishdf5) {
2917 #if defined(PETSC_HAVE_HDF5)
2918     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2919 #else
2920     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2921 #endif
2922   } else {
2923     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2924   }
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2929 {
2930   PetscScalar    *vals,*svals;
2931   MPI_Comm       comm;
2932   PetscErrorCode ierr;
2933   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2934   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2935   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2936   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2937   PetscInt       cend,cstart,n,*rowners;
2938   int            fd;
2939   PetscInt       bs = newMat->rmap->bs;
2940 
2941   PetscFunctionBegin;
2942   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2943   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2944   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2945   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2946   if (!rank) {
2947     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2948     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2949     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2950   }
2951 
2952   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2953   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2954   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2955   if (bs < 0) bs = 1;
2956 
2957   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2958   M    = header[1]; N = header[2];
2959 
2960   /* If global sizes are set, check if they are consistent with that given in the file */
2961   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2962   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2963 
2964   /* determine ownership of all (block) rows */
2965   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2966   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2967   else m = newMat->rmap->n; /* Set by user */
2968 
2969   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2970   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2971 
2972   /* First process needs enough room for process with most rows */
2973   if (!rank) {
2974     mmax = rowners[1];
2975     for (i=2; i<=size; i++) {
2976       mmax = PetscMax(mmax, rowners[i]);
2977     }
2978   } else mmax = -1;             /* unused, but compilers complain */
2979 
2980   rowners[0] = 0;
2981   for (i=2; i<=size; i++) {
2982     rowners[i] += rowners[i-1];
2983   }
2984   rstart = rowners[rank];
2985   rend   = rowners[rank+1];
2986 
2987   /* distribute row lengths to all processors */
2988   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2989   if (!rank) {
2990     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
2991     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2992     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2993     for (j=0; j<m; j++) {
2994       procsnz[0] += ourlens[j];
2995     }
2996     for (i=1; i<size; i++) {
2997       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
2998       /* calculate the number of nonzeros on each processor */
2999       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3000         procsnz[i] += rowlengths[j];
3001       }
3002       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3003     }
3004     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3005   } else {
3006     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3007   }
3008 
3009   if (!rank) {
3010     /* determine max buffer needed and allocate it */
3011     maxnz = 0;
3012     for (i=0; i<size; i++) {
3013       maxnz = PetscMax(maxnz,procsnz[i]);
3014     }
3015     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3016 
3017     /* read in my part of the matrix column indices  */
3018     nz   = procsnz[0];
3019     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3020     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3021 
3022     /* read in every one elses and ship off */
3023     for (i=1; i<size; i++) {
3024       nz   = procsnz[i];
3025       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3026       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3027     }
3028     ierr = PetscFree(cols);CHKERRQ(ierr);
3029   } else {
3030     /* determine buffer space needed for message */
3031     nz = 0;
3032     for (i=0; i<m; i++) {
3033       nz += ourlens[i];
3034     }
3035     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3036 
3037     /* receive message of column indices*/
3038     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3039   }
3040 
3041   /* determine column ownership if matrix is not square */
3042   if (N != M) {
3043     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3044     else n = newMat->cmap->n;
3045     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3046     cstart = cend - n;
3047   } else {
3048     cstart = rstart;
3049     cend   = rend;
3050     n      = cend - cstart;
3051   }
3052 
3053   /* loop over local rows, determining number of off diagonal entries */
3054   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3055   jj   = 0;
3056   for (i=0; i<m; i++) {
3057     for (j=0; j<ourlens[i]; j++) {
3058       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3059       jj++;
3060     }
3061   }
3062 
3063   for (i=0; i<m; i++) {
3064     ourlens[i] -= offlens[i];
3065   }
3066   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3067 
3068   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3069 
3070   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3071 
3072   for (i=0; i<m; i++) {
3073     ourlens[i] += offlens[i];
3074   }
3075 
3076   if (!rank) {
3077     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3078 
3079     /* read in my part of the matrix numerical values  */
3080     nz   = procsnz[0];
3081     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3082 
3083     /* insert into matrix */
3084     jj      = rstart;
3085     smycols = mycols;
3086     svals   = vals;
3087     for (i=0; i<m; i++) {
3088       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3089       smycols += ourlens[i];
3090       svals   += ourlens[i];
3091       jj++;
3092     }
3093 
3094     /* read in other processors and ship out */
3095     for (i=1; i<size; i++) {
3096       nz   = procsnz[i];
3097       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3098       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3099     }
3100     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3101   } else {
3102     /* receive numeric values */
3103     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3104 
3105     /* receive message of values*/
3106     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3107 
3108     /* insert into matrix */
3109     jj      = rstart;
3110     smycols = mycols;
3111     svals   = vals;
3112     for (i=0; i<m; i++) {
3113       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3114       smycols += ourlens[i];
3115       svals   += ourlens[i];
3116       jj++;
3117     }
3118   }
3119   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3120   ierr = PetscFree(vals);CHKERRQ(ierr);
3121   ierr = PetscFree(mycols);CHKERRQ(ierr);
3122   ierr = PetscFree(rowners);CHKERRQ(ierr);
3123   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3124   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 /* Not scalable because of ISAllGather() unless getting all columns. */
3129 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3130 {
3131   PetscErrorCode ierr;
3132   IS             iscol_local;
3133   PetscBool      isstride;
3134   PetscMPIInt    lisstride=0,gisstride;
3135 
3136   PetscFunctionBegin;
3137   /* check if we are grabbing all columns*/
3138   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3139 
3140   if (isstride) {
3141     PetscInt  start,len,mstart,mlen;
3142     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3143     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3144     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3145     if (mstart == start && mlen-mstart == len) lisstride = 1;
3146   }
3147 
3148   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3149   if (gisstride) {
3150     PetscInt N;
3151     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3152     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3153     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3154     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3155   } else {
3156     PetscInt cbs;
3157     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3158     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3159     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3160   }
3161 
3162   *isseq = iscol_local;
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 /*
3167  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3168  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3169 
3170  Input Parameters:
3171    mat - matrix
3172    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3173            i.e., mat->rstart <= isrow[i] < mat->rend
3174    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3175            i.e., mat->cstart <= iscol[i] < mat->cend
3176  Output Parameter:
3177    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3178    iscol_o - sequential column index set for retrieving mat->B
3179    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3180  */
3181 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3182 {
3183   PetscErrorCode ierr;
3184   Vec            x,cmap;
3185   const PetscInt *is_idx;
3186   PetscScalar    *xarray,*cmaparray;
3187   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3188   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3189   Mat            B=a->B;
3190   Vec            lvec=a->lvec,lcmap;
3191   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3192   MPI_Comm       comm;
3193   VecScatter     Mvctx=a->Mvctx;
3194 
3195   PetscFunctionBegin;
3196   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3198 
3199   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3200   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3201   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3202   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3203   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3204 
3205   /* Get start indices */
3206   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3207   isstart -= ncols;
3208   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3209 
3210   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3211   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3212   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3213   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3214   for (i=0; i<ncols; i++) {
3215     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3216     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3217     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3218   }
3219   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3220   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3221   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3222 
3223   /* Get iscol_d */
3224   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3225   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3226   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3227 
3228   /* Get isrow_d */
3229   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3230   rstart = mat->rmap->rstart;
3231   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3232   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3233   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3234   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3235 
3236   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3237   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3238   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3239 
3240   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3241   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3242   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3243 
3244   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3245 
3246   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3247   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3248 
3249   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3250   /* off-process column indices */
3251   count = 0;
3252   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3253   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3254 
3255   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3256   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3257   for (i=0; i<Bn; i++) {
3258     if (PetscRealPart(xarray[i]) > -1.0) {
3259       idx[count]     = i;                   /* local column index in off-diagonal part B */
3260       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3261       count++;
3262     }
3263   }
3264   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3265   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3266 
3267   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3268   /* cannot ensure iscol_o has same blocksize as iscol! */
3269 
3270   ierr = PetscFree(idx);CHKERRQ(ierr);
3271   *garray = cmap1;
3272 
3273   ierr = VecDestroy(&x);CHKERRQ(ierr);
3274   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3275   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3280 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3281 {
3282   PetscErrorCode ierr;
3283   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3284   Mat            M = NULL;
3285   MPI_Comm       comm;
3286   IS             iscol_d,isrow_d,iscol_o;
3287   Mat            Asub = NULL,Bsub = NULL;
3288   PetscInt       n;
3289 
3290   PetscFunctionBegin;
3291   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3292 
3293   if (call == MAT_REUSE_MATRIX) {
3294     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3295     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3296     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3297 
3298     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3299     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3300 
3301     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3302     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3303 
3304     /* Update diagonal and off-diagonal portions of submat */
3305     asub = (Mat_MPIAIJ*)(*submat)->data;
3306     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3307     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3308     if (n) {
3309       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3310     }
3311     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3313 
3314   } else { /* call == MAT_INITIAL_MATRIX) */
3315     const PetscInt *garray;
3316     PetscInt        BsubN;
3317 
3318     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3319     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3320 
3321     /* Create local submatrices Asub and Bsub */
3322     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3323     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3324 
3325     /* Create submatrix M */
3326     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3327 
3328     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3329     asub = (Mat_MPIAIJ*)M->data;
3330 
3331     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3332     n = asub->B->cmap->N;
3333     if (BsubN > n) {
3334       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3335       const PetscInt *idx;
3336       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3337       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3338 
3339       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3340       j = 0;
3341       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3342       for (i=0; i<n; i++) {
3343         if (j >= BsubN) break;
3344         while (subgarray[i] > garray[j]) j++;
3345 
3346         if (subgarray[i] == garray[j]) {
3347           idx_new[i] = idx[j++];
3348         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3349       }
3350       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3351 
3352       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3353       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3354 
3355     } else if (BsubN < n) {
3356       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3357     }
3358 
3359     ierr = PetscFree(garray);CHKERRQ(ierr);
3360     *submat = M;
3361 
3362     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3363     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3364     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3365 
3366     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3367     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3368 
3369     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3370     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3371   }
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3376 {
3377   PetscErrorCode ierr;
3378   IS             iscol_local=NULL,isrow_d;
3379   PetscInt       csize;
3380   PetscInt       n,i,j,start,end;
3381   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3382   MPI_Comm       comm;
3383 
3384   PetscFunctionBegin;
3385   /* If isrow has same processor distribution as mat,
3386      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3387   if (call == MAT_REUSE_MATRIX) {
3388     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3389     if (isrow_d) {
3390       sameRowDist  = PETSC_TRUE;
3391       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3392     } else {
3393       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3394       if (iscol_local) {
3395         sameRowDist  = PETSC_TRUE;
3396         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3397       }
3398     }
3399   } else {
3400     /* Check if isrow has same processor distribution as mat */
3401     sameDist[0] = PETSC_FALSE;
3402     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3403     if (!n) {
3404       sameDist[0] = PETSC_TRUE;
3405     } else {
3406       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3407       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3408       if (i >= start && j < end) {
3409         sameDist[0] = PETSC_TRUE;
3410       }
3411     }
3412 
3413     /* Check if iscol has same processor distribution as mat */
3414     sameDist[1] = PETSC_FALSE;
3415     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3416     if (!n) {
3417       sameDist[1] = PETSC_TRUE;
3418     } else {
3419       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3420       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3421       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3422     }
3423 
3424     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3425     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3426     sameRowDist = tsameDist[0];
3427   }
3428 
3429   if (sameRowDist) {
3430     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3431       /* isrow and iscol have same processor distribution as mat */
3432       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3433       PetscFunctionReturn(0);
3434     } else { /* sameRowDist */
3435       /* isrow has same processor distribution as mat */
3436       if (call == MAT_INITIAL_MATRIX) {
3437         PetscBool sorted;
3438         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3439         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3440         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3441         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3442 
3443         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3444         if (sorted) {
3445           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3446           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3447           PetscFunctionReturn(0);
3448         }
3449       } else { /* call == MAT_REUSE_MATRIX */
3450         IS    iscol_sub;
3451         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3452         if (iscol_sub) {
3453           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3454           PetscFunctionReturn(0);
3455         }
3456       }
3457     }
3458   }
3459 
3460   /* General case: iscol -> iscol_local which has global size of iscol */
3461   if (call == MAT_REUSE_MATRIX) {
3462     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3463     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3464   } else {
3465     if (!iscol_local) {
3466       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3467     }
3468   }
3469 
3470   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3471   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3472 
3473   if (call == MAT_INITIAL_MATRIX) {
3474     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3475     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3476   }
3477   PetscFunctionReturn(0);
3478 }
3479 
3480 /*@C
3481      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3482          and "off-diagonal" part of the matrix in CSR format.
3483 
3484    Collective on MPI_Comm
3485 
3486    Input Parameters:
3487 +  comm - MPI communicator
3488 .  A - "diagonal" portion of matrix
3489 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3490 -  garray - global index of B columns
3491 
3492    Output Parameter:
3493 .   mat - the matrix, with input A as its local diagonal matrix
3494    Level: advanced
3495 
3496    Notes:
3497        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3498        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3499 
3500 .seealso: MatCreateMPIAIJWithSplitArrays()
3501 @*/
3502 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3503 {
3504   PetscErrorCode ierr;
3505   Mat_MPIAIJ     *maij;
3506   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3507   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3508   PetscScalar    *oa=b->a;
3509   Mat            Bnew;
3510   PetscInt       m,n,N;
3511 
3512   PetscFunctionBegin;
3513   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3514   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3515   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3516   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3517   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3518   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3519 
3520   /* Get global columns of mat */
3521   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3522 
3523   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3524   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3525   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3526   maij = (Mat_MPIAIJ*)(*mat)->data;
3527 
3528   (*mat)->preallocated = PETSC_TRUE;
3529 
3530   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3531   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3532 
3533   /* Set A as diagonal portion of *mat */
3534   maij->A = A;
3535 
3536   nz = oi[m];
3537   for (i=0; i<nz; i++) {
3538     col   = oj[i];
3539     oj[i] = garray[col];
3540   }
3541 
3542    /* Set Bnew as off-diagonal portion of *mat */
3543   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3544   bnew        = (Mat_SeqAIJ*)Bnew->data;
3545   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3546   maij->B     = Bnew;
3547 
3548   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3549 
3550   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3551   b->free_a       = PETSC_FALSE;
3552   b->free_ij      = PETSC_FALSE;
3553   ierr = MatDestroy(&B);CHKERRQ(ierr);
3554 
3555   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3556   bnew->free_a       = PETSC_TRUE;
3557   bnew->free_ij      = PETSC_TRUE;
3558 
3559   /* condense columns of maij->B */
3560   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3561   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3562   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3563   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3564   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3565   PetscFunctionReturn(0);
3566 }
3567 
3568 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3569 
3570 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3571 {
3572   PetscErrorCode ierr;
3573   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3574   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3575   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3576   Mat            M,Msub,B=a->B;
3577   MatScalar      *aa;
3578   Mat_SeqAIJ     *aij;
3579   PetscInt       *garray = a->garray,*colsub,Ncols;
3580   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3581   IS             iscol_sub,iscmap;
3582   const PetscInt *is_idx,*cmap;
3583   PetscBool      allcolumns=PETSC_FALSE;
3584   MPI_Comm       comm;
3585 
3586   PetscFunctionBegin;
3587   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3588 
3589   if (call == MAT_REUSE_MATRIX) {
3590     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3591     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3592     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3593 
3594     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3595     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3596 
3597     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3598     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3599 
3600     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3601 
3602   } else { /* call == MAT_INITIAL_MATRIX) */
3603     PetscBool flg;
3604 
3605     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3606     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3607 
3608     /* (1) iscol -> nonscalable iscol_local */
3609     /* Check for special case: each processor gets entire matrix columns */
3610     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3611     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3612     if (allcolumns) {
3613       iscol_sub = iscol_local;
3614       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3615       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3616 
3617     } else {
3618       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3619       PetscInt *idx,*cmap1,k;
3620       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3621       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3622       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3623       count = 0;
3624       k     = 0;
3625       for (i=0; i<Ncols; i++) {
3626         j = is_idx[i];
3627         if (j >= cstart && j < cend) {
3628           /* diagonal part of mat */
3629           idx[count]     = j;
3630           cmap1[count++] = i; /* column index in submat */
3631         } else if (Bn) {
3632           /* off-diagonal part of mat */
3633           if (j == garray[k]) {
3634             idx[count]     = j;
3635             cmap1[count++] = i;  /* column index in submat */
3636           } else if (j > garray[k]) {
3637             while (j > garray[k] && k < Bn-1) k++;
3638             if (j == garray[k]) {
3639               idx[count]     = j;
3640               cmap1[count++] = i; /* column index in submat */
3641             }
3642           }
3643         }
3644       }
3645       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3646 
3647       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3648       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3649       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3650 
3651       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3652     }
3653 
3654     /* (3) Create sequential Msub */
3655     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3656   }
3657 
3658   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3659   aij  = (Mat_SeqAIJ*)(Msub)->data;
3660   ii   = aij->i;
3661   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3662 
3663   /*
3664       m - number of local rows
3665       Ncols - number of columns (same on all processors)
3666       rstart - first row in new global matrix generated
3667   */
3668   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3669 
3670   if (call == MAT_INITIAL_MATRIX) {
3671     /* (4) Create parallel newmat */
3672     PetscMPIInt    rank,size;
3673     PetscInt       csize;
3674 
3675     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3676     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3677 
3678     /*
3679         Determine the number of non-zeros in the diagonal and off-diagonal
3680         portions of the matrix in order to do correct preallocation
3681     */
3682 
3683     /* first get start and end of "diagonal" columns */
3684     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3685     if (csize == PETSC_DECIDE) {
3686       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3687       if (mglobal == Ncols) { /* square matrix */
3688         nlocal = m;
3689       } else {
3690         nlocal = Ncols/size + ((Ncols % size) > rank);
3691       }
3692     } else {
3693       nlocal = csize;
3694     }
3695     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3696     rstart = rend - nlocal;
3697     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3698 
3699     /* next, compute all the lengths */
3700     jj    = aij->j;
3701     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3702     olens = dlens + m;
3703     for (i=0; i<m; i++) {
3704       jend = ii[i+1] - ii[i];
3705       olen = 0;
3706       dlen = 0;
3707       for (j=0; j<jend; j++) {
3708         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3709         else dlen++;
3710         jj++;
3711       }
3712       olens[i] = olen;
3713       dlens[i] = dlen;
3714     }
3715 
3716     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3717     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3718 
3719     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3720     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3721     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3722     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3723     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3724     ierr = PetscFree(dlens);CHKERRQ(ierr);
3725 
3726   } else { /* call == MAT_REUSE_MATRIX */
3727     M    = *newmat;
3728     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3729     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3730     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3731     /*
3732          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3733        rather than the slower MatSetValues().
3734     */
3735     M->was_assembled = PETSC_TRUE;
3736     M->assembled     = PETSC_FALSE;
3737   }
3738 
3739   /* (5) Set values of Msub to *newmat */
3740   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3741   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3742 
3743   jj   = aij->j;
3744   aa   = aij->a;
3745   for (i=0; i<m; i++) {
3746     row = rstart + i;
3747     nz  = ii[i+1] - ii[i];
3748     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3749     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3750     jj += nz; aa += nz;
3751   }
3752   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3753 
3754   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3756 
3757   ierr = PetscFree(colsub);CHKERRQ(ierr);
3758 
3759   /* save Msub, iscol_sub and iscmap used in processor for next request */
3760   if (call ==  MAT_INITIAL_MATRIX) {
3761     *newmat = M;
3762     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3763     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3764 
3765     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3766     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3767 
3768     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3769     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3770 
3771     if (iscol_local) {
3772       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3773       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3774     }
3775   }
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 /*
3780     Not great since it makes two copies of the submatrix, first an SeqAIJ
3781   in local and then by concatenating the local matrices the end result.
3782   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3783 
3784   Note: This requires a sequential iscol with all indices.
3785 */
3786 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3787 {
3788   PetscErrorCode ierr;
3789   PetscMPIInt    rank,size;
3790   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3791   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3792   Mat            M,Mreuse;
3793   MatScalar      *aa,*vwork;
3794   MPI_Comm       comm;
3795   Mat_SeqAIJ     *aij;
3796   PetscBool      colflag,allcolumns=PETSC_FALSE;
3797 
3798   PetscFunctionBegin;
3799   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3800   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3801   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3802 
3803   /* Check for special case: each processor gets entire matrix columns */
3804   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3805   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3806   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3807 
3808   if (call ==  MAT_REUSE_MATRIX) {
3809     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3810     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3811     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3812   } else {
3813     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3814   }
3815 
3816   /*
3817       m - number of local rows
3818       n - number of columns (same on all processors)
3819       rstart - first row in new global matrix generated
3820   */
3821   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3822   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3823   if (call == MAT_INITIAL_MATRIX) {
3824     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3825     ii  = aij->i;
3826     jj  = aij->j;
3827 
3828     /*
3829         Determine the number of non-zeros in the diagonal and off-diagonal
3830         portions of the matrix in order to do correct preallocation
3831     */
3832 
3833     /* first get start and end of "diagonal" columns */
3834     if (csize == PETSC_DECIDE) {
3835       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3836       if (mglobal == n) { /* square matrix */
3837         nlocal = m;
3838       } else {
3839         nlocal = n/size + ((n % size) > rank);
3840       }
3841     } else {
3842       nlocal = csize;
3843     }
3844     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3845     rstart = rend - nlocal;
3846     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3847 
3848     /* next, compute all the lengths */
3849     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3850     olens = dlens + m;
3851     for (i=0; i<m; i++) {
3852       jend = ii[i+1] - ii[i];
3853       olen = 0;
3854       dlen = 0;
3855       for (j=0; j<jend; j++) {
3856         if (*jj < rstart || *jj >= rend) olen++;
3857         else dlen++;
3858         jj++;
3859       }
3860       olens[i] = olen;
3861       dlens[i] = dlen;
3862     }
3863     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3864     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3865     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3866     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3867     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3868     ierr = PetscFree(dlens);CHKERRQ(ierr);
3869   } else {
3870     PetscInt ml,nl;
3871 
3872     M    = *newmat;
3873     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3874     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3875     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3876     /*
3877          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3878        rather than the slower MatSetValues().
3879     */
3880     M->was_assembled = PETSC_TRUE;
3881     M->assembled     = PETSC_FALSE;
3882   }
3883   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3884   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3885   ii   = aij->i;
3886   jj   = aij->j;
3887   aa   = aij->a;
3888   for (i=0; i<m; i++) {
3889     row   = rstart + i;
3890     nz    = ii[i+1] - ii[i];
3891     cwork = jj;     jj += nz;
3892     vwork = aa;     aa += nz;
3893     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3894   }
3895 
3896   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3897   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3898   *newmat = M;
3899 
3900   /* save submatrix used in processor for next request */
3901   if (call ==  MAT_INITIAL_MATRIX) {
3902     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3903     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3904   }
3905   PetscFunctionReturn(0);
3906 }
3907 
3908 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3909 {
3910   PetscInt       m,cstart, cend,j,nnz,i,d;
3911   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3912   const PetscInt *JJ;
3913   PetscScalar    *values;
3914   PetscErrorCode ierr;
3915   PetscBool      nooffprocentries;
3916 
3917   PetscFunctionBegin;
3918   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3919 
3920   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3921   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3922   m      = B->rmap->n;
3923   cstart = B->cmap->rstart;
3924   cend   = B->cmap->rend;
3925   rstart = B->rmap->rstart;
3926 
3927   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3928 
3929 #if defined(PETSC_USE_DEBUG)
3930   for (i=0; i<m && Ii; i++) {
3931     nnz = Ii[i+1]- Ii[i];
3932     JJ  = J + Ii[i];
3933     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3934     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3935     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3936   }
3937 #endif
3938 
3939   for (i=0; i<m && Ii; i++) {
3940     nnz     = Ii[i+1]- Ii[i];
3941     JJ      = J + Ii[i];
3942     nnz_max = PetscMax(nnz_max,nnz);
3943     d       = 0;
3944     for (j=0; j<nnz; j++) {
3945       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3946     }
3947     d_nnz[i] = d;
3948     o_nnz[i] = nnz - d;
3949   }
3950   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3951   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3952 
3953   if (v) values = (PetscScalar*)v;
3954   else {
3955     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3956   }
3957 
3958   for (i=0; i<m && Ii; i++) {
3959     ii   = i + rstart;
3960     nnz  = Ii[i+1]- Ii[i];
3961     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3962   }
3963   nooffprocentries    = B->nooffprocentries;
3964   B->nooffprocentries = PETSC_TRUE;
3965   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3966   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3967   B->nooffprocentries = nooffprocentries;
3968 
3969   if (!v) {
3970     ierr = PetscFree(values);CHKERRQ(ierr);
3971   }
3972   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3973   PetscFunctionReturn(0);
3974 }
3975 
3976 /*@
3977    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3978    (the default parallel PETSc format).
3979 
3980    Collective on MPI_Comm
3981 
3982    Input Parameters:
3983 +  B - the matrix
3984 .  i - the indices into j for the start of each local row (starts with zero)
3985 .  j - the column indices for each local row (starts with zero)
3986 -  v - optional values in the matrix
3987 
3988    Level: developer
3989 
3990    Notes:
3991        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3992      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3993      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3994 
3995        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3996 
3997        The format which is used for the sparse matrix input, is equivalent to a
3998     row-major ordering.. i.e for the following matrix, the input data expected is
3999     as shown
4000 
4001 $        1 0 0
4002 $        2 0 3     P0
4003 $       -------
4004 $        4 5 6     P1
4005 $
4006 $     Process0 [P0]: rows_owned=[0,1]
4007 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4008 $        j =  {0,0,2}  [size = 3]
4009 $        v =  {1,2,3}  [size = 3]
4010 $
4011 $     Process1 [P1]: rows_owned=[2]
4012 $        i =  {0,3}    [size = nrow+1  = 1+1]
4013 $        j =  {0,1,2}  [size = 3]
4014 $        v =  {4,5,6}  [size = 3]
4015 
4016 .keywords: matrix, aij, compressed row, sparse, parallel
4017 
4018 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4019           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4020 @*/
4021 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4022 {
4023   PetscErrorCode ierr;
4024 
4025   PetscFunctionBegin;
4026   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4027   PetscFunctionReturn(0);
4028 }
4029 
4030 /*@C
4031    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4032    (the default parallel PETSc format).  For good matrix assembly performance
4033    the user should preallocate the matrix storage by setting the parameters
4034    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4035    performance can be increased by more than a factor of 50.
4036 
4037    Collective on MPI_Comm
4038 
4039    Input Parameters:
4040 +  B - the matrix
4041 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4042            (same value is used for all local rows)
4043 .  d_nnz - array containing the number of nonzeros in the various rows of the
4044            DIAGONAL portion of the local submatrix (possibly different for each row)
4045            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4046            The size of this array is equal to the number of local rows, i.e 'm'.
4047            For matrices that will be factored, you must leave room for (and set)
4048            the diagonal entry even if it is zero.
4049 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4050            submatrix (same value is used for all local rows).
4051 -  o_nnz - array containing the number of nonzeros in the various rows of the
4052            OFF-DIAGONAL portion of the local submatrix (possibly different for
4053            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4054            structure. The size of this array is equal to the number
4055            of local rows, i.e 'm'.
4056 
4057    If the *_nnz parameter is given then the *_nz parameter is ignored
4058 
4059    The AIJ format (also called the Yale sparse matrix format or
4060    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4061    storage.  The stored row and column indices begin with zero.
4062    See Users-Manual: ch_mat for details.
4063 
4064    The parallel matrix is partitioned such that the first m0 rows belong to
4065    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4066    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4067 
4068    The DIAGONAL portion of the local submatrix of a processor can be defined
4069    as the submatrix which is obtained by extraction the part corresponding to
4070    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4071    first row that belongs to the processor, r2 is the last row belonging to
4072    the this processor, and c1-c2 is range of indices of the local part of a
4073    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4074    common case of a square matrix, the row and column ranges are the same and
4075    the DIAGONAL part is also square. The remaining portion of the local
4076    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4077 
4078    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4079 
4080    You can call MatGetInfo() to get information on how effective the preallocation was;
4081    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4082    You can also run with the option -info and look for messages with the string
4083    malloc in them to see if additional memory allocation was needed.
4084 
4085    Example usage:
4086 
4087    Consider the following 8x8 matrix with 34 non-zero values, that is
4088    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4089    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4090    as follows:
4091 
4092 .vb
4093             1  2  0  |  0  3  0  |  0  4
4094     Proc0   0  5  6  |  7  0  0  |  8  0
4095             9  0 10  | 11  0  0  | 12  0
4096     -------------------------------------
4097            13  0 14  | 15 16 17  |  0  0
4098     Proc1   0 18  0  | 19 20 21  |  0  0
4099             0  0  0  | 22 23  0  | 24  0
4100     -------------------------------------
4101     Proc2  25 26 27  |  0  0 28  | 29  0
4102            30  0  0  | 31 32 33  |  0 34
4103 .ve
4104 
4105    This can be represented as a collection of submatrices as:
4106 
4107 .vb
4108       A B C
4109       D E F
4110       G H I
4111 .ve
4112 
4113    Where the submatrices A,B,C are owned by proc0, D,E,F are
4114    owned by proc1, G,H,I are owned by proc2.
4115 
4116    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4117    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4118    The 'M','N' parameters are 8,8, and have the same values on all procs.
4119 
4120    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4121    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4122    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4123    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4124    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4125    matrix, ans [DF] as another SeqAIJ matrix.
4126 
4127    When d_nz, o_nz parameters are specified, d_nz storage elements are
4128    allocated for every row of the local diagonal submatrix, and o_nz
4129    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4130    One way to choose d_nz and o_nz is to use the max nonzerors per local
4131    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4132    In this case, the values of d_nz,o_nz are:
4133 .vb
4134      proc0 : dnz = 2, o_nz = 2
4135      proc1 : dnz = 3, o_nz = 2
4136      proc2 : dnz = 1, o_nz = 4
4137 .ve
4138    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4139    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4140    for proc3. i.e we are using 12+15+10=37 storage locations to store
4141    34 values.
4142 
4143    When d_nnz, o_nnz parameters are specified, the storage is specified
4144    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4145    In the above case the values for d_nnz,o_nnz are:
4146 .vb
4147      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4148      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4149      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4150 .ve
4151    Here the space allocated is sum of all the above values i.e 34, and
4152    hence pre-allocation is perfect.
4153 
4154    Level: intermediate
4155 
4156 .keywords: matrix, aij, compressed row, sparse, parallel
4157 
4158 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4159           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4160 @*/
4161 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4162 {
4163   PetscErrorCode ierr;
4164 
4165   PetscFunctionBegin;
4166   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4167   PetscValidType(B,1);
4168   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4169   PetscFunctionReturn(0);
4170 }
4171 
4172 /*@
4173      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4174          CSR format the local rows.
4175 
4176    Collective on MPI_Comm
4177 
4178    Input Parameters:
4179 +  comm - MPI communicator
4180 .  m - number of local rows (Cannot be PETSC_DECIDE)
4181 .  n - This value should be the same as the local size used in creating the
4182        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4183        calculated if N is given) For square matrices n is almost always m.
4184 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4185 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4186 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4187 .   j - column indices
4188 -   a - matrix values
4189 
4190    Output Parameter:
4191 .   mat - the matrix
4192 
4193    Level: intermediate
4194 
4195    Notes:
4196        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4197      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4198      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4199 
4200        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4201 
4202        The format which is used for the sparse matrix input, is equivalent to a
4203     row-major ordering.. i.e for the following matrix, the input data expected is
4204     as shown
4205 
4206 $        1 0 0
4207 $        2 0 3     P0
4208 $       -------
4209 $        4 5 6     P1
4210 $
4211 $     Process0 [P0]: rows_owned=[0,1]
4212 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4213 $        j =  {0,0,2}  [size = 3]
4214 $        v =  {1,2,3}  [size = 3]
4215 $
4216 $     Process1 [P1]: rows_owned=[2]
4217 $        i =  {0,3}    [size = nrow+1  = 1+1]
4218 $        j =  {0,1,2}  [size = 3]
4219 $        v =  {4,5,6}  [size = 3]
4220 
4221 .keywords: matrix, aij, compressed row, sparse, parallel
4222 
4223 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4224           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4225 @*/
4226 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4227 {
4228   PetscErrorCode ierr;
4229 
4230   PetscFunctionBegin;
4231   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4232   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4233   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4234   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4235   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4236   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4237   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4238   PetscFunctionReturn(0);
4239 }
4240 
4241 /*@C
4242    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4243    (the default parallel PETSc format).  For good matrix assembly performance
4244    the user should preallocate the matrix storage by setting the parameters
4245    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4246    performance can be increased by more than a factor of 50.
4247 
4248    Collective on MPI_Comm
4249 
4250    Input Parameters:
4251 +  comm - MPI communicator
4252 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4253            This value should be the same as the local size used in creating the
4254            y vector for the matrix-vector product y = Ax.
4255 .  n - This value should be the same as the local size used in creating the
4256        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4257        calculated if N is given) For square matrices n is almost always m.
4258 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4259 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4260 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4261            (same value is used for all local rows)
4262 .  d_nnz - array containing the number of nonzeros in the various rows of the
4263            DIAGONAL portion of the local submatrix (possibly different for each row)
4264            or NULL, if d_nz is used to specify the nonzero structure.
4265            The size of this array is equal to the number of local rows, i.e 'm'.
4266 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4267            submatrix (same value is used for all local rows).
4268 -  o_nnz - array containing the number of nonzeros in the various rows of the
4269            OFF-DIAGONAL portion of the local submatrix (possibly different for
4270            each row) or NULL, if o_nz is used to specify the nonzero
4271            structure. The size of this array is equal to the number
4272            of local rows, i.e 'm'.
4273 
4274    Output Parameter:
4275 .  A - the matrix
4276 
4277    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4278    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4279    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4280 
4281    Notes:
4282    If the *_nnz parameter is given then the *_nz parameter is ignored
4283 
4284    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4285    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4286    storage requirements for this matrix.
4287 
4288    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4289    processor than it must be used on all processors that share the object for
4290    that argument.
4291 
4292    The user MUST specify either the local or global matrix dimensions
4293    (possibly both).
4294 
4295    The parallel matrix is partitioned across processors such that the
4296    first m0 rows belong to process 0, the next m1 rows belong to
4297    process 1, the next m2 rows belong to process 2 etc.. where
4298    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4299    values corresponding to [m x N] submatrix.
4300 
4301    The columns are logically partitioned with the n0 columns belonging
4302    to 0th partition, the next n1 columns belonging to the next
4303    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4304 
4305    The DIAGONAL portion of the local submatrix on any given processor
4306    is the submatrix corresponding to the rows and columns m,n
4307    corresponding to the given processor. i.e diagonal matrix on
4308    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4309    etc. The remaining portion of the local submatrix [m x (N-n)]
4310    constitute the OFF-DIAGONAL portion. The example below better
4311    illustrates this concept.
4312 
4313    For a square global matrix we define each processor's diagonal portion
4314    to be its local rows and the corresponding columns (a square submatrix);
4315    each processor's off-diagonal portion encompasses the remainder of the
4316    local matrix (a rectangular submatrix).
4317 
4318    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4319 
4320    When calling this routine with a single process communicator, a matrix of
4321    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4322    type of communicator, use the construction mechanism
4323 .vb
4324      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4325 .ve
4326 
4327 $     MatCreate(...,&A);
4328 $     MatSetType(A,MATMPIAIJ);
4329 $     MatSetSizes(A, m,n,M,N);
4330 $     MatMPIAIJSetPreallocation(A,...);
4331 
4332    By default, this format uses inodes (identical nodes) when possible.
4333    We search for consecutive rows with the same nonzero structure, thereby
4334    reusing matrix information to achieve increased efficiency.
4335 
4336    Options Database Keys:
4337 +  -mat_no_inode  - Do not use inodes
4338 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4339 
4340 
4341 
4342    Example usage:
4343 
4344    Consider the following 8x8 matrix with 34 non-zero values, that is
4345    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4346    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4347    as follows
4348 
4349 .vb
4350             1  2  0  |  0  3  0  |  0  4
4351     Proc0   0  5  6  |  7  0  0  |  8  0
4352             9  0 10  | 11  0  0  | 12  0
4353     -------------------------------------
4354            13  0 14  | 15 16 17  |  0  0
4355     Proc1   0 18  0  | 19 20 21  |  0  0
4356             0  0  0  | 22 23  0  | 24  0
4357     -------------------------------------
4358     Proc2  25 26 27  |  0  0 28  | 29  0
4359            30  0  0  | 31 32 33  |  0 34
4360 .ve
4361 
4362    This can be represented as a collection of submatrices as
4363 
4364 .vb
4365       A B C
4366       D E F
4367       G H I
4368 .ve
4369 
4370    Where the submatrices A,B,C are owned by proc0, D,E,F are
4371    owned by proc1, G,H,I are owned by proc2.
4372 
4373    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4374    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4375    The 'M','N' parameters are 8,8, and have the same values on all procs.
4376 
4377    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4378    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4379    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4380    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4381    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4382    matrix, ans [DF] as another SeqAIJ matrix.
4383 
4384    When d_nz, o_nz parameters are specified, d_nz storage elements are
4385    allocated for every row of the local diagonal submatrix, and o_nz
4386    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4387    One way to choose d_nz and o_nz is to use the max nonzerors per local
4388    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4389    In this case, the values of d_nz,o_nz are
4390 .vb
4391      proc0 : dnz = 2, o_nz = 2
4392      proc1 : dnz = 3, o_nz = 2
4393      proc2 : dnz = 1, o_nz = 4
4394 .ve
4395    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4396    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4397    for proc3. i.e we are using 12+15+10=37 storage locations to store
4398    34 values.
4399 
4400    When d_nnz, o_nnz parameters are specified, the storage is specified
4401    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4402    In the above case the values for d_nnz,o_nnz are
4403 .vb
4404      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4405      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4406      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4407 .ve
4408    Here the space allocated is sum of all the above values i.e 34, and
4409    hence pre-allocation is perfect.
4410 
4411    Level: intermediate
4412 
4413 .keywords: matrix, aij, compressed row, sparse, parallel
4414 
4415 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4416           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4417 @*/
4418 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4419 {
4420   PetscErrorCode ierr;
4421   PetscMPIInt    size;
4422 
4423   PetscFunctionBegin;
4424   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4425   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4426   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4427   if (size > 1) {
4428     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4429     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4430   } else {
4431     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4432     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4433   }
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4438 {
4439   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4440   PetscBool      flg;
4441   PetscErrorCode ierr;
4442 
4443   PetscFunctionBegin;
4444   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4445   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4446   if (Ad)     *Ad     = a->A;
4447   if (Ao)     *Ao     = a->B;
4448   if (colmap) *colmap = a->garray;
4449   PetscFunctionReturn(0);
4450 }
4451 
4452 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4453 {
4454   PetscErrorCode ierr;
4455   PetscInt       m,N,i,rstart,nnz,Ii;
4456   PetscInt       *indx;
4457   PetscScalar    *values;
4458 
4459   PetscFunctionBegin;
4460   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4461   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4462     PetscInt       *dnz,*onz,sum,bs,cbs;
4463 
4464     if (n == PETSC_DECIDE) {
4465       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4466     }
4467     /* Check sum(n) = N */
4468     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4469     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4470 
4471     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4472     rstart -= m;
4473 
4474     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4475     for (i=0; i<m; i++) {
4476       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4477       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4478       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4479     }
4480 
4481     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4482     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4483     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4484     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4485     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4486     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4487     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4488     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4489   }
4490 
4491   /* numeric phase */
4492   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4493   for (i=0; i<m; i++) {
4494     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4495     Ii   = i + rstart;
4496     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4497     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4498   }
4499   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4500   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4501   PetscFunctionReturn(0);
4502 }
4503 
4504 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4505 {
4506   PetscErrorCode    ierr;
4507   PetscMPIInt       rank;
4508   PetscInt          m,N,i,rstart,nnz;
4509   size_t            len;
4510   const PetscInt    *indx;
4511   PetscViewer       out;
4512   char              *name;
4513   Mat               B;
4514   const PetscScalar *values;
4515 
4516   PetscFunctionBegin;
4517   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4518   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4519   /* Should this be the type of the diagonal block of A? */
4520   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4521   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4522   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4523   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4524   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4525   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4526   for (i=0; i<m; i++) {
4527     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4528     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4529     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4530   }
4531   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4532   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4533 
4534   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4535   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4536   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4537   sprintf(name,"%s.%d",outfile,rank);
4538   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4539   ierr = PetscFree(name);CHKERRQ(ierr);
4540   ierr = MatView(B,out);CHKERRQ(ierr);
4541   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4542   ierr = MatDestroy(&B);CHKERRQ(ierr);
4543   PetscFunctionReturn(0);
4544 }
4545 
4546 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4547 {
4548   PetscErrorCode      ierr;
4549   Mat_Merge_SeqsToMPI *merge;
4550   PetscContainer      container;
4551 
4552   PetscFunctionBegin;
4553   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4554   if (container) {
4555     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4561     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4564     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4565     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4566     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4567     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4568     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4569     ierr = PetscFree(merge);CHKERRQ(ierr);
4570     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4571   }
4572   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4573   PetscFunctionReturn(0);
4574 }
4575 
4576 #include <../src/mat/utils/freespace.h>
4577 #include <petscbt.h>
4578 
4579 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4580 {
4581   PetscErrorCode      ierr;
4582   MPI_Comm            comm;
4583   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4584   PetscMPIInt         size,rank,taga,*len_s;
4585   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4586   PetscInt            proc,m;
4587   PetscInt            **buf_ri,**buf_rj;
4588   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4589   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4590   MPI_Request         *s_waits,*r_waits;
4591   MPI_Status          *status;
4592   MatScalar           *aa=a->a;
4593   MatScalar           **abuf_r,*ba_i;
4594   Mat_Merge_SeqsToMPI *merge;
4595   PetscContainer      container;
4596 
4597   PetscFunctionBegin;
4598   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4599   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4600 
4601   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4602   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4603 
4604   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4605   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4606 
4607   bi     = merge->bi;
4608   bj     = merge->bj;
4609   buf_ri = merge->buf_ri;
4610   buf_rj = merge->buf_rj;
4611 
4612   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4613   owners = merge->rowmap->range;
4614   len_s  = merge->len_s;
4615 
4616   /* send and recv matrix values */
4617   /*-----------------------------*/
4618   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4619   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4620 
4621   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4622   for (proc=0,k=0; proc<size; proc++) {
4623     if (!len_s[proc]) continue;
4624     i    = owners[proc];
4625     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4626     k++;
4627   }
4628 
4629   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4630   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4631   ierr = PetscFree(status);CHKERRQ(ierr);
4632 
4633   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4634   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4635 
4636   /* insert mat values of mpimat */
4637   /*----------------------------*/
4638   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4639   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4640 
4641   for (k=0; k<merge->nrecv; k++) {
4642     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4643     nrows       = *(buf_ri_k[k]);
4644     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4645     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4646   }
4647 
4648   /* set values of ba */
4649   m = merge->rowmap->n;
4650   for (i=0; i<m; i++) {
4651     arow = owners[rank] + i;
4652     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4653     bnzi = bi[i+1] - bi[i];
4654     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4655 
4656     /* add local non-zero vals of this proc's seqmat into ba */
4657     anzi   = ai[arow+1] - ai[arow];
4658     aj     = a->j + ai[arow];
4659     aa     = a->a + ai[arow];
4660     nextaj = 0;
4661     for (j=0; nextaj<anzi; j++) {
4662       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4663         ba_i[j] += aa[nextaj++];
4664       }
4665     }
4666 
4667     /* add received vals into ba */
4668     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4669       /* i-th row */
4670       if (i == *nextrow[k]) {
4671         anzi   = *(nextai[k]+1) - *nextai[k];
4672         aj     = buf_rj[k] + *(nextai[k]);
4673         aa     = abuf_r[k] + *(nextai[k]);
4674         nextaj = 0;
4675         for (j=0; nextaj<anzi; j++) {
4676           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4677             ba_i[j] += aa[nextaj++];
4678           }
4679         }
4680         nextrow[k]++; nextai[k]++;
4681       }
4682     }
4683     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4684   }
4685   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4686   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4687 
4688   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4689   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4690   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4691   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4692   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4693   PetscFunctionReturn(0);
4694 }
4695 
4696 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4697 {
4698   PetscErrorCode      ierr;
4699   Mat                 B_mpi;
4700   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4701   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4702   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4703   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4704   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4705   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4706   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4707   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4708   MPI_Status          *status;
4709   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4710   PetscBT             lnkbt;
4711   Mat_Merge_SeqsToMPI *merge;
4712   PetscContainer      container;
4713 
4714   PetscFunctionBegin;
4715   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4716 
4717   /* make sure it is a PETSc comm */
4718   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4719   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4720   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4721 
4722   ierr = PetscNew(&merge);CHKERRQ(ierr);
4723   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4724 
4725   /* determine row ownership */
4726   /*---------------------------------------------------------*/
4727   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4728   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4729   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4730   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4731   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4732   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4733   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4734 
4735   m      = merge->rowmap->n;
4736   owners = merge->rowmap->range;
4737 
4738   /* determine the number of messages to send, their lengths */
4739   /*---------------------------------------------------------*/
4740   len_s = merge->len_s;
4741 
4742   len          = 0; /* length of buf_si[] */
4743   merge->nsend = 0;
4744   for (proc=0; proc<size; proc++) {
4745     len_si[proc] = 0;
4746     if (proc == rank) {
4747       len_s[proc] = 0;
4748     } else {
4749       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4750       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4751     }
4752     if (len_s[proc]) {
4753       merge->nsend++;
4754       nrows = 0;
4755       for (i=owners[proc]; i<owners[proc+1]; i++) {
4756         if (ai[i+1] > ai[i]) nrows++;
4757       }
4758       len_si[proc] = 2*(nrows+1);
4759       len         += len_si[proc];
4760     }
4761   }
4762 
4763   /* determine the number and length of messages to receive for ij-structure */
4764   /*-------------------------------------------------------------------------*/
4765   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4766   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4767 
4768   /* post the Irecv of j-structure */
4769   /*-------------------------------*/
4770   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4771   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4772 
4773   /* post the Isend of j-structure */
4774   /*--------------------------------*/
4775   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4776 
4777   for (proc=0, k=0; proc<size; proc++) {
4778     if (!len_s[proc]) continue;
4779     i    = owners[proc];
4780     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4781     k++;
4782   }
4783 
4784   /* receives and sends of j-structure are complete */
4785   /*------------------------------------------------*/
4786   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4787   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4788 
4789   /* send and recv i-structure */
4790   /*---------------------------*/
4791   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4792   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4793 
4794   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4795   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4796   for (proc=0,k=0; proc<size; proc++) {
4797     if (!len_s[proc]) continue;
4798     /* form outgoing message for i-structure:
4799          buf_si[0]:                 nrows to be sent
4800                [1:nrows]:           row index (global)
4801                [nrows+1:2*nrows+1]: i-structure index
4802     */
4803     /*-------------------------------------------*/
4804     nrows       = len_si[proc]/2 - 1;
4805     buf_si_i    = buf_si + nrows+1;
4806     buf_si[0]   = nrows;
4807     buf_si_i[0] = 0;
4808     nrows       = 0;
4809     for (i=owners[proc]; i<owners[proc+1]; i++) {
4810       anzi = ai[i+1] - ai[i];
4811       if (anzi) {
4812         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4813         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4814         nrows++;
4815       }
4816     }
4817     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4818     k++;
4819     buf_si += len_si[proc];
4820   }
4821 
4822   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4823   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4824 
4825   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4826   for (i=0; i<merge->nrecv; i++) {
4827     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4828   }
4829 
4830   ierr = PetscFree(len_si);CHKERRQ(ierr);
4831   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4832   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4833   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4834   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4835   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4836   ierr = PetscFree(status);CHKERRQ(ierr);
4837 
4838   /* compute a local seq matrix in each processor */
4839   /*----------------------------------------------*/
4840   /* allocate bi array and free space for accumulating nonzero column info */
4841   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4842   bi[0] = 0;
4843 
4844   /* create and initialize a linked list */
4845   nlnk = N+1;
4846   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4847 
4848   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4849   len  = ai[owners[rank+1]] - ai[owners[rank]];
4850   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4851 
4852   current_space = free_space;
4853 
4854   /* determine symbolic info for each local row */
4855   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4856 
4857   for (k=0; k<merge->nrecv; k++) {
4858     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4859     nrows       = *buf_ri_k[k];
4860     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4861     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4862   }
4863 
4864   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4865   len  = 0;
4866   for (i=0; i<m; i++) {
4867     bnzi = 0;
4868     /* add local non-zero cols of this proc's seqmat into lnk */
4869     arow  = owners[rank] + i;
4870     anzi  = ai[arow+1] - ai[arow];
4871     aj    = a->j + ai[arow];
4872     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4873     bnzi += nlnk;
4874     /* add received col data into lnk */
4875     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4876       if (i == *nextrow[k]) { /* i-th row */
4877         anzi  = *(nextai[k]+1) - *nextai[k];
4878         aj    = buf_rj[k] + *nextai[k];
4879         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4880         bnzi += nlnk;
4881         nextrow[k]++; nextai[k]++;
4882       }
4883     }
4884     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4885 
4886     /* if free space is not available, make more free space */
4887     if (current_space->local_remaining<bnzi) {
4888       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4889       nspacedouble++;
4890     }
4891     /* copy data into free space, then initialize lnk */
4892     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4893     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4894 
4895     current_space->array           += bnzi;
4896     current_space->local_used      += bnzi;
4897     current_space->local_remaining -= bnzi;
4898 
4899     bi[i+1] = bi[i] + bnzi;
4900   }
4901 
4902   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4903 
4904   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4905   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4906   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4907 
4908   /* create symbolic parallel matrix B_mpi */
4909   /*---------------------------------------*/
4910   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4911   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4912   if (n==PETSC_DECIDE) {
4913     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4914   } else {
4915     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4916   }
4917   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4918   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4919   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4920   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4921   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4922 
4923   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4924   B_mpi->assembled    = PETSC_FALSE;
4925   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4926   merge->bi           = bi;
4927   merge->bj           = bj;
4928   merge->buf_ri       = buf_ri;
4929   merge->buf_rj       = buf_rj;
4930   merge->coi          = NULL;
4931   merge->coj          = NULL;
4932   merge->owners_co    = NULL;
4933 
4934   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4935 
4936   /* attach the supporting struct to B_mpi for reuse */
4937   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4938   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4939   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4940   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4941   *mpimat = B_mpi;
4942 
4943   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4944   PetscFunctionReturn(0);
4945 }
4946 
4947 /*@C
4948       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4949                  matrices from each processor
4950 
4951     Collective on MPI_Comm
4952 
4953    Input Parameters:
4954 +    comm - the communicators the parallel matrix will live on
4955 .    seqmat - the input sequential matrices
4956 .    m - number of local rows (or PETSC_DECIDE)
4957 .    n - number of local columns (or PETSC_DECIDE)
4958 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4959 
4960    Output Parameter:
4961 .    mpimat - the parallel matrix generated
4962 
4963     Level: advanced
4964 
4965    Notes:
4966      The dimensions of the sequential matrix in each processor MUST be the same.
4967      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4968      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4969 @*/
4970 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4971 {
4972   PetscErrorCode ierr;
4973   PetscMPIInt    size;
4974 
4975   PetscFunctionBegin;
4976   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4977   if (size == 1) {
4978     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4979     if (scall == MAT_INITIAL_MATRIX) {
4980       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4981     } else {
4982       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4983     }
4984     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4985     PetscFunctionReturn(0);
4986   }
4987   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4988   if (scall == MAT_INITIAL_MATRIX) {
4989     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4990   }
4991   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4992   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 /*@
4997      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4998           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4999           with MatGetSize()
5000 
5001     Not Collective
5002 
5003    Input Parameters:
5004 +    A - the matrix
5005 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5006 
5007    Output Parameter:
5008 .    A_loc - the local sequential matrix generated
5009 
5010     Level: developer
5011 
5012 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5013 
5014 @*/
5015 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5016 {
5017   PetscErrorCode ierr;
5018   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5019   Mat_SeqAIJ     *mat,*a,*b;
5020   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5021   MatScalar      *aa,*ba,*cam;
5022   PetscScalar    *ca;
5023   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5024   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5025   PetscBool      match;
5026   MPI_Comm       comm;
5027   PetscMPIInt    size;
5028 
5029   PetscFunctionBegin;
5030   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5031   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5032   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5034   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5035 
5036   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5037   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5038   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5039   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5040   aa = a->a; ba = b->a;
5041   if (scall == MAT_INITIAL_MATRIX) {
5042     if (size == 1) {
5043       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5044       PetscFunctionReturn(0);
5045     }
5046 
5047     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5048     ci[0] = 0;
5049     for (i=0; i<am; i++) {
5050       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5051     }
5052     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5053     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5054     k    = 0;
5055     for (i=0; i<am; i++) {
5056       ncols_o = bi[i+1] - bi[i];
5057       ncols_d = ai[i+1] - ai[i];
5058       /* off-diagonal portion of A */
5059       for (jo=0; jo<ncols_o; jo++) {
5060         col = cmap[*bj];
5061         if (col >= cstart) break;
5062         cj[k]   = col; bj++;
5063         ca[k++] = *ba++;
5064       }
5065       /* diagonal portion of A */
5066       for (j=0; j<ncols_d; j++) {
5067         cj[k]   = cstart + *aj++;
5068         ca[k++] = *aa++;
5069       }
5070       /* off-diagonal portion of A */
5071       for (j=jo; j<ncols_o; j++) {
5072         cj[k]   = cmap[*bj++];
5073         ca[k++] = *ba++;
5074       }
5075     }
5076     /* put together the new matrix */
5077     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5078     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5079     /* Since these are PETSc arrays, change flags to free them as necessary. */
5080     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5081     mat->free_a  = PETSC_TRUE;
5082     mat->free_ij = PETSC_TRUE;
5083     mat->nonew   = 0;
5084   } else if (scall == MAT_REUSE_MATRIX) {
5085     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5086     ci = mat->i; cj = mat->j; cam = mat->a;
5087     for (i=0; i<am; i++) {
5088       /* off-diagonal portion of A */
5089       ncols_o = bi[i+1] - bi[i];
5090       for (jo=0; jo<ncols_o; jo++) {
5091         col = cmap[*bj];
5092         if (col >= cstart) break;
5093         *cam++ = *ba++; bj++;
5094       }
5095       /* diagonal portion of A */
5096       ncols_d = ai[i+1] - ai[i];
5097       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5098       /* off-diagonal portion of A */
5099       for (j=jo; j<ncols_o; j++) {
5100         *cam++ = *ba++; bj++;
5101       }
5102     }
5103   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5104   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5105   PetscFunctionReturn(0);
5106 }
5107 
5108 /*@C
5109      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5110 
5111     Not Collective
5112 
5113    Input Parameters:
5114 +    A - the matrix
5115 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5116 -    row, col - index sets of rows and columns to extract (or NULL)
5117 
5118    Output Parameter:
5119 .    A_loc - the local sequential matrix generated
5120 
5121     Level: developer
5122 
5123 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5124 
5125 @*/
5126 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5127 {
5128   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5129   PetscErrorCode ierr;
5130   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5131   IS             isrowa,iscola;
5132   Mat            *aloc;
5133   PetscBool      match;
5134 
5135   PetscFunctionBegin;
5136   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5137   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5138   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5139   if (!row) {
5140     start = A->rmap->rstart; end = A->rmap->rend;
5141     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5142   } else {
5143     isrowa = *row;
5144   }
5145   if (!col) {
5146     start = A->cmap->rstart;
5147     cmap  = a->garray;
5148     nzA   = a->A->cmap->n;
5149     nzB   = a->B->cmap->n;
5150     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5151     ncols = 0;
5152     for (i=0; i<nzB; i++) {
5153       if (cmap[i] < start) idx[ncols++] = cmap[i];
5154       else break;
5155     }
5156     imark = i;
5157     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5158     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5159     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5160   } else {
5161     iscola = *col;
5162   }
5163   if (scall != MAT_INITIAL_MATRIX) {
5164     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5165     aloc[0] = *A_loc;
5166   }
5167   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5168   if (!col) { /* attach global id of condensed columns */
5169     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5170   }
5171   *A_loc = aloc[0];
5172   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5173   if (!row) {
5174     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5175   }
5176   if (!col) {
5177     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5178   }
5179   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5180   PetscFunctionReturn(0);
5181 }
5182 
5183 /*@C
5184     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5185 
5186     Collective on Mat
5187 
5188    Input Parameters:
5189 +    A,B - the matrices in mpiaij format
5190 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5191 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5192 
5193    Output Parameter:
5194 +    rowb, colb - index sets of rows and columns of B to extract
5195 -    B_seq - the sequential matrix generated
5196 
5197     Level: developer
5198 
5199 @*/
5200 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5201 {
5202   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5203   PetscErrorCode ierr;
5204   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5205   IS             isrowb,iscolb;
5206   Mat            *bseq=NULL;
5207 
5208   PetscFunctionBegin;
5209   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5210     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5211   }
5212   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5213 
5214   if (scall == MAT_INITIAL_MATRIX) {
5215     start = A->cmap->rstart;
5216     cmap  = a->garray;
5217     nzA   = a->A->cmap->n;
5218     nzB   = a->B->cmap->n;
5219     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5220     ncols = 0;
5221     for (i=0; i<nzB; i++) {  /* row < local row index */
5222       if (cmap[i] < start) idx[ncols++] = cmap[i];
5223       else break;
5224     }
5225     imark = i;
5226     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5227     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5228     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5229     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5230   } else {
5231     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5232     isrowb  = *rowb; iscolb = *colb;
5233     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5234     bseq[0] = *B_seq;
5235   }
5236   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5237   *B_seq = bseq[0];
5238   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5239   if (!rowb) {
5240     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5241   } else {
5242     *rowb = isrowb;
5243   }
5244   if (!colb) {
5245     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5246   } else {
5247     *colb = iscolb;
5248   }
5249   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5250   PetscFunctionReturn(0);
5251 }
5252 
5253 /*
5254     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5255     of the OFF-DIAGONAL portion of local A
5256 
5257     Collective on Mat
5258 
5259    Input Parameters:
5260 +    A,B - the matrices in mpiaij format
5261 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5262 
5263    Output Parameter:
5264 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5265 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5266 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5267 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5268 
5269     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5270      for this matrix. This is not desirable..
5271 
5272     Level: developer
5273 
5274 */
5275 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5276 {
5277   PetscErrorCode         ierr;
5278   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5279   Mat_SeqAIJ             *b_oth;
5280   VecScatter             ctx;
5281   MPI_Comm               comm;
5282   const PetscMPIInt      *rprocs,*sprocs;
5283   const PetscInt         *srow,*rstarts,*sstarts;
5284   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5285   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5286   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5287   MPI_Request            *rwaits = NULL,*swaits = NULL;
5288   MPI_Status             rstatus;
5289   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5290 
5291   PetscFunctionBegin;
5292   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5293   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5294 
5295   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5296     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5297   }
5298   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5299   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5300 
5301   if (size == 1) {
5302     startsj_s = NULL;
5303     bufa_ptr  = NULL;
5304     *B_oth    = NULL;
5305     PetscFunctionReturn(0);
5306   }
5307 
5308   ctx = a->Mvctx;
5309   tag = ((PetscObject)ctx)->tag;
5310 
5311   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5312   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5313   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5314   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5315   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5316   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5317   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5318 
5319   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5320   if (scall == MAT_INITIAL_MATRIX) {
5321     /* i-array */
5322     /*---------*/
5323     /*  post receives */
5324     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5325     for (i=0; i<nrecvs; i++) {
5326       rowlen = rvalues + rstarts[i]*rbs;
5327       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5328       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5329     }
5330 
5331     /* pack the outgoing message */
5332     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5333 
5334     sstartsj[0] = 0;
5335     rstartsj[0] = 0;
5336     len         = 0; /* total length of j or a array to be sent */
5337     if (nsends) {
5338       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5339       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5340     }
5341     for (i=0; i<nsends; i++) {
5342       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5343       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5344       for (j=0; j<nrows; j++) {
5345         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5346         for (l=0; l<sbs; l++) {
5347           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5348 
5349           rowlen[j*sbs+l] = ncols;
5350 
5351           len += ncols;
5352           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5353         }
5354         k++;
5355       }
5356       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5357 
5358       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5359     }
5360     /* recvs and sends of i-array are completed */
5361     i = nrecvs;
5362     while (i--) {
5363       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5364     }
5365     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5366     ierr = PetscFree(svalues);CHKERRQ(ierr);
5367 
5368     /* allocate buffers for sending j and a arrays */
5369     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5370     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5371 
5372     /* create i-array of B_oth */
5373     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5374 
5375     b_othi[0] = 0;
5376     len       = 0; /* total length of j or a array to be received */
5377     k         = 0;
5378     for (i=0; i<nrecvs; i++) {
5379       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5380       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5381       for (j=0; j<nrows; j++) {
5382         b_othi[k+1] = b_othi[k] + rowlen[j];
5383         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5384         k++;
5385       }
5386       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5387     }
5388     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5389 
5390     /* allocate space for j and a arrrays of B_oth */
5391     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5392     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5393 
5394     /* j-array */
5395     /*---------*/
5396     /*  post receives of j-array */
5397     for (i=0; i<nrecvs; i++) {
5398       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5399       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5400     }
5401 
5402     /* pack the outgoing message j-array */
5403     if (nsends) k = sstarts[0];
5404     for (i=0; i<nsends; i++) {
5405       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5406       bufJ  = bufj+sstartsj[i];
5407       for (j=0; j<nrows; j++) {
5408         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5409         for (ll=0; ll<sbs; ll++) {
5410           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5411           for (l=0; l<ncols; l++) {
5412             *bufJ++ = cols[l];
5413           }
5414           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5415         }
5416       }
5417       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5418     }
5419 
5420     /* recvs and sends of j-array are completed */
5421     i = nrecvs;
5422     while (i--) {
5423       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5424     }
5425     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5426   } else if (scall == MAT_REUSE_MATRIX) {
5427     sstartsj = *startsj_s;
5428     rstartsj = *startsj_r;
5429     bufa     = *bufa_ptr;
5430     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5431     b_otha   = b_oth->a;
5432   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5433 
5434   /* a-array */
5435   /*---------*/
5436   /*  post receives of a-array */
5437   for (i=0; i<nrecvs; i++) {
5438     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5439     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5440   }
5441 
5442   /* pack the outgoing message a-array */
5443   if (nsends) k = sstarts[0];
5444   for (i=0; i<nsends; i++) {
5445     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5446     bufA  = bufa+sstartsj[i];
5447     for (j=0; j<nrows; j++) {
5448       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5449       for (ll=0; ll<sbs; ll++) {
5450         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5451         for (l=0; l<ncols; l++) {
5452           *bufA++ = vals[l];
5453         }
5454         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5455       }
5456     }
5457     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5458   }
5459   /* recvs and sends of a-array are completed */
5460   i = nrecvs;
5461   while (i--) {
5462     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5463   }
5464   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5465   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5466 
5467   if (scall == MAT_INITIAL_MATRIX) {
5468     /* put together the new matrix */
5469     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5470 
5471     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5472     /* Since these are PETSc arrays, change flags to free them as necessary. */
5473     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5474     b_oth->free_a  = PETSC_TRUE;
5475     b_oth->free_ij = PETSC_TRUE;
5476     b_oth->nonew   = 0;
5477 
5478     ierr = PetscFree(bufj);CHKERRQ(ierr);
5479     if (!startsj_s || !bufa_ptr) {
5480       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5481       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5482     } else {
5483       *startsj_s = sstartsj;
5484       *startsj_r = rstartsj;
5485       *bufa_ptr  = bufa;
5486     }
5487   }
5488 
5489   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5490   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5491   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5492   PetscFunctionReturn(0);
5493 }
5494 
5495 /*@C
5496   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5497 
5498   Not Collective
5499 
5500   Input Parameters:
5501 . A - The matrix in mpiaij format
5502 
5503   Output Parameter:
5504 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5505 . colmap - A map from global column index to local index into lvec
5506 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5507 
5508   Level: developer
5509 
5510 @*/
5511 #if defined(PETSC_USE_CTABLE)
5512 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5513 #else
5514 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5515 #endif
5516 {
5517   Mat_MPIAIJ *a;
5518 
5519   PetscFunctionBegin;
5520   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5521   PetscValidPointer(lvec, 2);
5522   PetscValidPointer(colmap, 3);
5523   PetscValidPointer(multScatter, 4);
5524   a = (Mat_MPIAIJ*) A->data;
5525   if (lvec) *lvec = a->lvec;
5526   if (colmap) *colmap = a->colmap;
5527   if (multScatter) *multScatter = a->Mvctx;
5528   PetscFunctionReturn(0);
5529 }
5530 
5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5533 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5534 #if defined(PETSC_HAVE_MKL_SPARSE)
5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5536 #endif
5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5538 #if defined(PETSC_HAVE_ELEMENTAL)
5539 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5540 #endif
5541 #if defined(PETSC_HAVE_HYPRE)
5542 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5543 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5544 #endif
5545 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5546 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5547 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5548 
5549 /*
5550     Computes (B'*A')' since computing B*A directly is untenable
5551 
5552                n                       p                          p
5553         (              )       (              )         (                  )
5554       m (      A       )  *  n (       B      )   =   m (         C        )
5555         (              )       (              )         (                  )
5556 
5557 */
5558 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5559 {
5560   PetscErrorCode ierr;
5561   Mat            At,Bt,Ct;
5562 
5563   PetscFunctionBegin;
5564   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5565   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5566   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5567   ierr = MatDestroy(&At);CHKERRQ(ierr);
5568   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5569   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5570   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5571   PetscFunctionReturn(0);
5572 }
5573 
5574 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5575 {
5576   PetscErrorCode ierr;
5577   PetscInt       m=A->rmap->n,n=B->cmap->n;
5578   Mat            Cmat;
5579 
5580   PetscFunctionBegin;
5581   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5582   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5583   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5584   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5585   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5586   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5587   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5588   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5589 
5590   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5591 
5592   *C = Cmat;
5593   PetscFunctionReturn(0);
5594 }
5595 
5596 /* ----------------------------------------------------------------*/
5597 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5598 {
5599   PetscErrorCode ierr;
5600 
5601   PetscFunctionBegin;
5602   if (scall == MAT_INITIAL_MATRIX) {
5603     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5604     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5605     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5606   }
5607   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5608   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5609   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5610   PetscFunctionReturn(0);
5611 }
5612 
5613 /*MC
5614    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5615 
5616    Options Database Keys:
5617 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5618 
5619   Level: beginner
5620 
5621 .seealso: MatCreateAIJ()
5622 M*/
5623 
5624 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5625 {
5626   Mat_MPIAIJ     *b;
5627   PetscErrorCode ierr;
5628   PetscMPIInt    size;
5629 
5630   PetscFunctionBegin;
5631   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5632 
5633   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5634   B->data       = (void*)b;
5635   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5636   B->assembled  = PETSC_FALSE;
5637   B->insertmode = NOT_SET_VALUES;
5638   b->size       = size;
5639 
5640   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5641 
5642   /* build cache for off array entries formed */
5643   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5644 
5645   b->donotstash  = PETSC_FALSE;
5646   b->colmap      = 0;
5647   b->garray      = 0;
5648   b->roworiented = PETSC_TRUE;
5649 
5650   /* stuff used for matrix vector multiply */
5651   b->lvec  = NULL;
5652   b->Mvctx = NULL;
5653 
5654   /* stuff for MatGetRow() */
5655   b->rowindices   = 0;
5656   b->rowvalues    = 0;
5657   b->getrowactive = PETSC_FALSE;
5658 
5659   /* flexible pointer used in CUSP/CUSPARSE classes */
5660   b->spptr = NULL;
5661 
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5672 #if defined(PETSC_HAVE_MKL_SPARSE)
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5674 #endif
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5677 #if defined(PETSC_HAVE_ELEMENTAL)
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5679 #endif
5680 #if defined(PETSC_HAVE_HYPRE)
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5682 #endif
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5688 #if defined(PETSC_HAVE_HYPRE)
5689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5690 #endif
5691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5692   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5693   PetscFunctionReturn(0);
5694 }
5695 
5696 /*@C
5697      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5698          and "off-diagonal" part of the matrix in CSR format.
5699 
5700    Collective on MPI_Comm
5701 
5702    Input Parameters:
5703 +  comm - MPI communicator
5704 .  m - number of local rows (Cannot be PETSC_DECIDE)
5705 .  n - This value should be the same as the local size used in creating the
5706        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5707        calculated if N is given) For square matrices n is almost always m.
5708 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5709 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5710 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5711 .   j - column indices
5712 .   a - matrix values
5713 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5714 .   oj - column indices
5715 -   oa - matrix values
5716 
5717    Output Parameter:
5718 .   mat - the matrix
5719 
5720    Level: advanced
5721 
5722    Notes:
5723        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5724        must free the arrays once the matrix has been destroyed and not before.
5725 
5726        The i and j indices are 0 based
5727 
5728        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5729 
5730        This sets local rows and cannot be used to set off-processor values.
5731 
5732        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5733        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5734        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5735        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5736        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5737        communication if it is known that only local entries will be set.
5738 
5739 .keywords: matrix, aij, compressed row, sparse, parallel
5740 
5741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5742           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5743 @*/
5744 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5745 {
5746   PetscErrorCode ierr;
5747   Mat_MPIAIJ     *maij;
5748 
5749   PetscFunctionBegin;
5750   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5751   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5752   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5753   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5754   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5755   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5756   maij = (Mat_MPIAIJ*) (*mat)->data;
5757 
5758   (*mat)->preallocated = PETSC_TRUE;
5759 
5760   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5761   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5762 
5763   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5764   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5765 
5766   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5767   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5768   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5769   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5770 
5771   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5772   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5773   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5774   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5775   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5776   PetscFunctionReturn(0);
5777 }
5778 
5779 /*
5780     Special version for direct calls from Fortran
5781 */
5782 #include <petsc/private/fortranimpl.h>
5783 
5784 /* Change these macros so can be used in void function */
5785 #undef CHKERRQ
5786 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5787 #undef SETERRQ2
5788 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5789 #undef SETERRQ3
5790 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5791 #undef SETERRQ
5792 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5793 
5794 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5795 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5796 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5797 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5798 #else
5799 #endif
5800 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5801 {
5802   Mat            mat  = *mmat;
5803   PetscInt       m    = *mm, n = *mn;
5804   InsertMode     addv = *maddv;
5805   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5806   PetscScalar    value;
5807   PetscErrorCode ierr;
5808 
5809   MatCheckPreallocated(mat,1);
5810   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5811 
5812 #if defined(PETSC_USE_DEBUG)
5813   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5814 #endif
5815   {
5816     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5817     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5818     PetscBool roworiented = aij->roworiented;
5819 
5820     /* Some Variables required in the macro */
5821     Mat        A                 = aij->A;
5822     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5823     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5824     MatScalar  *aa               = a->a;
5825     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5826     Mat        B                 = aij->B;
5827     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5828     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5829     MatScalar  *ba               = b->a;
5830 
5831     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5832     PetscInt  nonew = a->nonew;
5833     MatScalar *ap1,*ap2;
5834 
5835     PetscFunctionBegin;
5836     for (i=0; i<m; i++) {
5837       if (im[i] < 0) continue;
5838 #if defined(PETSC_USE_DEBUG)
5839       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5840 #endif
5841       if (im[i] >= rstart && im[i] < rend) {
5842         row      = im[i] - rstart;
5843         lastcol1 = -1;
5844         rp1      = aj + ai[row];
5845         ap1      = aa + ai[row];
5846         rmax1    = aimax[row];
5847         nrow1    = ailen[row];
5848         low1     = 0;
5849         high1    = nrow1;
5850         lastcol2 = -1;
5851         rp2      = bj + bi[row];
5852         ap2      = ba + bi[row];
5853         rmax2    = bimax[row];
5854         nrow2    = bilen[row];
5855         low2     = 0;
5856         high2    = nrow2;
5857 
5858         for (j=0; j<n; j++) {
5859           if (roworiented) value = v[i*n+j];
5860           else value = v[i+j*m];
5861           if (in[j] >= cstart && in[j] < cend) {
5862             col = in[j] - cstart;
5863             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5864             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5865           } else if (in[j] < 0) continue;
5866 #if defined(PETSC_USE_DEBUG)
5867           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5868           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5869 #endif
5870           else {
5871             if (mat->was_assembled) {
5872               if (!aij->colmap) {
5873                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5874               }
5875 #if defined(PETSC_USE_CTABLE)
5876               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5877               col--;
5878 #else
5879               col = aij->colmap[in[j]] - 1;
5880 #endif
5881               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5882               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5883                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5884                 col  =  in[j];
5885                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5886                 B     = aij->B;
5887                 b     = (Mat_SeqAIJ*)B->data;
5888                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5889                 rp2   = bj + bi[row];
5890                 ap2   = ba + bi[row];
5891                 rmax2 = bimax[row];
5892                 nrow2 = bilen[row];
5893                 low2  = 0;
5894                 high2 = nrow2;
5895                 bm    = aij->B->rmap->n;
5896                 ba    = b->a;
5897               }
5898             } else col = in[j];
5899             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5900           }
5901         }
5902       } else if (!aij->donotstash) {
5903         if (roworiented) {
5904           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5905         } else {
5906           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5907         }
5908       }
5909     }
5910   }
5911   PetscFunctionReturnVoid();
5912 }
5913