xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e5a36eccef3d6b83a2c625c30d0dfd5adb4001f2)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
218   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) ap1[_i] += value;   \
444           else                    ap1[_i] = value; \
445           goto a_noinsert; \
446         } \
447       }  \
448       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
449       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
450       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
451       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
452       N = nrow1++ - 1; a->nz++; high1++; \
453       /* shift up all the later entries in this row */ \
454       for (ii=N; ii>=_i; ii--) { \
455         rp1[ii+1] = rp1[ii]; \
456         ap1[ii+1] = ap1[ii]; \
457       } \
458       rp1[_i] = col;  \
459       ap1[_i] = value;  \
460       A->nonzerostate++;\
461       a_noinsert: ; \
462       ailen[row] = nrow1; \
463 }
464 
465 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
466   { \
467     if (col <= lastcol2) low2 = 0;                        \
468     else high2 = nrow2;                                   \
469     lastcol2 = col;                                       \
470     while (high2-low2 > 5) {                              \
471       t = (low2+high2)/2;                                 \
472       if (rp2[t] > col) high2 = t;                        \
473       else             low2  = t;                         \
474     }                                                     \
475     for (_i=low2; _i<high2; _i++) {                       \
476       if (rp2[_i] > col) break;                           \
477       if (rp2[_i] == col) {                               \
478         if (addv == ADD_VALUES) ap2[_i] += value;         \
479         else                    ap2[_i] = value;          \
480         goto b_noinsert;                                  \
481       }                                                   \
482     }                                                     \
483     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
484     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
485     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
486     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
487     N = nrow2++ - 1; b->nz++; high2++;                    \
488     /* shift up all the later entries in this row */      \
489     for (ii=N; ii>=_i; ii--) {                            \
490       rp2[ii+1] = rp2[ii];                                \
491       ap2[ii+1] = ap2[ii];                                \
492     }                                                     \
493     rp2[_i] = col;                                        \
494     ap2[_i] = value;                                      \
495     B->nonzerostate++;                                    \
496     b_noinsert: ;                                         \
497     bilen[row] = nrow2;                                   \
498   }
499 
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
527 {
528   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
529   PetscScalar    value;
530   PetscErrorCode ierr;
531   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
532   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
533   PetscBool      roworiented = aij->roworiented;
534 
535   /* Some Variables required in the macro */
536   Mat        A                 = aij->A;
537   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
538   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
539   MatScalar  *aa               = a->a;
540   PetscBool  ignorezeroentries = a->ignorezeroentries;
541   Mat        B                 = aij->B;
542   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
543   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
544   MatScalar  *ba               = b->a;
545 
546   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
547   PetscInt  nonew;
548   MatScalar *ap1,*ap2;
549 
550   PetscFunctionBegin;
551   for (i=0; i<m; i++) {
552     if (im[i] < 0) continue;
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
555 #endif
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = aj + ai[row];
560       ap1      = aa + ai[row];
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = bj + bi[row];
567       ap2      = ba + bi[row];
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j=0; j<n; j++) {
574         if (roworiented) value = v[i*n+j];
575         else             value = v[i+j*m];
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
580           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
581         } else if (in[j] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
584 #endif
585         else {
586           if (mat->was_assembled) {
587             if (!aij->colmap) {
588               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
589             }
590 #if defined(PETSC_USE_CTABLE)
591             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
592             col--;
593 #else
594             col = aij->colmap[in[j]] - 1;
595 #endif
596             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
597               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
598               col  =  in[j];
599               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
600               B     = aij->B;
601               b     = (Mat_SeqAIJ*)B->data;
602               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
603               rp2   = bj + bi[row];
604               ap2   = ba + bi[row];
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0) {
612               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
613                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
614               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
619         }
620       }
621     } else {
622       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
638     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
639     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
640 */
641 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
642 {
643   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
644   Mat            A           = aij->A; /* diagonal part of the matrix */
645   Mat            B           = aij->B; /* offdiagonal part of the matrix */
646   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
647   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
648   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
649   PetscInt       *ailen      = a->ilen,*aj = a->j;
650   PetscInt       *bilen      = b->ilen,*bj = b->j;
651   PetscInt       am          = aij->A->rmap->n,j;
652   PetscInt       diag_so_far = 0,dnz;
653   PetscInt       offd_so_far = 0,onz;
654 
655   PetscFunctionBegin;
656   /* Iterate over all rows of the matrix */
657   for (j=0; j<am; j++) {
658     dnz = onz = 0;
659     /*  Iterate over all non-zero columns of the current row */
660     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
661       /* If column is in the diagonal */
662       if (mat_j[col] >= cstart && mat_j[col] < cend) {
663         aj[diag_so_far++] = mat_j[col] - cstart;
664         dnz++;
665       } else { /* off-diagonal entries */
666         bj[offd_so_far++] = mat_j[col];
667         onz++;
668       }
669     }
670     ailen[j] = dnz;
671     bilen[j] = onz;
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 /*
677     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
678     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
679     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
680     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
681     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
682 */
683 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
684 {
685   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
686   Mat            A      = aij->A; /* diagonal part of the matrix */
687   Mat            B      = aij->B; /* offdiagonal part of the matrix */
688   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
689   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
690   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
691   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
692   PetscInt       *ailen = a->ilen,*aj = a->j;
693   PetscInt       *bilen = b->ilen,*bj = b->j;
694   PetscInt       am     = aij->A->rmap->n,j;
695   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
696   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
697   PetscScalar    *aa = a->a,*ba = b->a;
698 
699   PetscFunctionBegin;
700   /* Iterate over all rows of the matrix */
701   for (j=0; j<am; j++) {
702     dnz_row = onz_row = 0;
703     rowstart_offd = full_offd_i[j];
704     rowstart_diag = full_diag_i[j];
705     /*  Iterate over all non-zero columns of the current row */
706     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
707       /* If column is in the diagonal */
708       if (mat_j[col] >= cstart && mat_j[col] < cend) {
709         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
710         aa[rowstart_diag+dnz_row] = mat_a[col];
711         dnz_row++;
712       } else { /* off-diagonal entries */
713         bj[rowstart_offd+onz_row] = mat_j[col];
714         ba[rowstart_offd+onz_row] = mat_a[col];
715         onz_row++;
716       }
717     }
718     ailen[j] = dnz_row;
719     bilen[j] = onz_row;
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
725 {
726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
727   PetscErrorCode ierr;
728   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
729   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
730 
731   PetscFunctionBegin;
732   for (i=0; i<m; i++) {
733     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
734     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
735     if (idxm[i] >= rstart && idxm[i] < rend) {
736       row = idxm[i] - rstart;
737       for (j=0; j<n; j++) {
738         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
739         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
740         if (idxn[j] >= cstart && idxn[j] < cend) {
741           col  = idxn[j] - cstart;
742           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
743         } else {
744           if (!aij->colmap) {
745             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
746           }
747 #if defined(PETSC_USE_CTABLE)
748           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
749           col--;
750 #else
751           col = aij->colmap[idxn[j]] - 1;
752 #endif
753           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
754           else {
755             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
756           }
757         }
758       }
759     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
765 
766 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
767 {
768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
769   PetscErrorCode ierr;
770   PetscInt       nstash,reallocs;
771 
772   PetscFunctionBegin;
773   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
774 
775   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
776   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
777   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
782 {
783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
784   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
785   PetscErrorCode ierr;
786   PetscMPIInt    n;
787   PetscInt       i,j,rstart,ncols,flg;
788   PetscInt       *row,*col;
789   PetscBool      other_disassembled;
790   PetscScalar    *val;
791 
792   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
793 
794   PetscFunctionBegin;
795   if (!aij->donotstash && !mat->nooffprocentries) {
796     while (1) {
797       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
798       if (!flg) break;
799 
800       for (i=0; i<n; ) {
801         /* Now identify the consecutive vals belonging to the same row */
802         for (j=i,rstart=row[j]; j<n; j++) {
803           if (row[j] != rstart) break;
804         }
805         if (j < n) ncols = j-i;
806         else       ncols = n-i;
807         /* Now assemble all these values with a single function call */
808         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
809 
810         i = j;
811       }
812     }
813     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
814   }
815   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
816   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
817 
818   /* determine if any processor has disassembled, if so we must
819      also disassemble ourselfs, in order that we may reassemble. */
820   /*
821      if nonzero structure of submatrix B cannot change then we know that
822      no processor disassembled thus we can skip this stuff
823   */
824   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
825     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
826     if (mat->was_assembled && !other_disassembled) {
827       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
828     }
829   }
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
831     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
832   }
833   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
834   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
835   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
836 
837   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
838 
839   aij->rowvalues = 0;
840 
841   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
842   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
843 
844   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
845   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
846     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
847     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
853 {
854   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
859   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
864 {
865   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
866   PetscObjectState sA, sB;
867   PetscInt        *lrows;
868   PetscInt         r, len;
869   PetscBool        cong, lch, gch;
870   PetscErrorCode   ierr;
871 
872   PetscFunctionBegin;
873   /* get locally owned rows */
874   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
875   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
876   /* fix right hand side if needed */
877   if (x && b) {
878     const PetscScalar *xx;
879     PetscScalar       *bb;
880 
881     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
882     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
883     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
884     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
885     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
886     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
887   }
888 
889   sA = mat->A->nonzerostate;
890   sB = mat->B->nonzerostate;
891 
892   if (diag != 0.0 && cong) {
893     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
894     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
895   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
896     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
897     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
898     PetscInt   nnwA, nnwB;
899     PetscBool  nnzA, nnzB;
900 
901     nnwA = aijA->nonew;
902     nnwB = aijB->nonew;
903     nnzA = aijA->keepnonzeropattern;
904     nnzB = aijB->keepnonzeropattern;
905     if (!nnzA) {
906       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
907       aijA->nonew = 0;
908     }
909     if (!nnzB) {
910       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
911       aijB->nonew = 0;
912     }
913     /* Must zero here before the next loop */
914     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
915     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) {
917       const PetscInt row = lrows[r] + A->rmap->rstart;
918       if (row >= A->cmap->N) continue;
919       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
920     }
921     aijA->nonew = nnwA;
922     aijB->nonew = nnwB;
923   } else {
924     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
925     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
926   }
927   ierr = PetscFree(lrows);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
930 
931   /* reduce nonzerostate */
932   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
933   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934   if (gch) A->nonzerostate++;
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
939 {
940   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
941   PetscErrorCode    ierr;
942   PetscMPIInt       n = A->rmap->n;
943   PetscInt          i,j,r,m,p = 0,len = 0;
944   PetscInt          *lrows,*owners = A->rmap->range;
945   PetscSFNode       *rrows;
946   PetscSF           sf;
947   const PetscScalar *xx;
948   PetscScalar       *bb,*mask;
949   Vec               xmask,lmask;
950   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
951   const PetscInt    *aj, *ii,*ridx;
952   PetscScalar       *aa;
953 
954   PetscFunctionBegin;
955   /* Create SF where leaves are input rows and roots are owned rows */
956   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
957   for (r = 0; r < n; ++r) lrows[r] = -1;
958   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
959   for (r = 0; r < N; ++r) {
960     const PetscInt idx   = rows[r];
961     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
962     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
963       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
964     }
965     rrows[r].rank  = p;
966     rrows[r].index = rows[r] - owners[p];
967   }
968   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
969   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
970   /* Collect flags for rows to be zeroed */
971   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
972   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
973   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
974   /* Compress and put in row numbers */
975   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
976   /* zero diagonal part of matrix */
977   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
978   /* handle off diagonal part of matrix */
979   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
980   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
981   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
982   for (i=0; i<len; i++) bb[lrows[i]] = 1;
983   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
984   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
987   if (x && b) { /* this code is buggy when the row and column layout don't match */
988     PetscBool cong;
989 
990     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
991     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
992     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
995     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
996   }
997   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
998   /* remove zeroed rows of off diagonal matrix */
999   ii = aij->i;
1000   for (i=0; i<len; i++) {
1001     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1002   }
1003   /* loop over all elements of off process part of matrix zeroing removed columns*/
1004   if (aij->compressedrow.use) {
1005     m    = aij->compressedrow.nrows;
1006     ii   = aij->compressedrow.i;
1007     ridx = aij->compressedrow.rindex;
1008     for (i=0; i<m; i++) {
1009       n  = ii[i+1] - ii[i];
1010       aj = aij->j + ii[i];
1011       aa = aij->a + ii[i];
1012 
1013       for (j=0; j<n; j++) {
1014         if (PetscAbsScalar(mask[*aj])) {
1015           if (b) bb[*ridx] -= *aa*xx[*aj];
1016           *aa = 0.0;
1017         }
1018         aa++;
1019         aj++;
1020       }
1021       ridx++;
1022     }
1023   } else { /* do not use compressed row format */
1024     m = l->B->rmap->n;
1025     for (i=0; i<m; i++) {
1026       n  = ii[i+1] - ii[i];
1027       aj = aij->j + ii[i];
1028       aa = aij->a + ii[i];
1029       for (j=0; j<n; j++) {
1030         if (PetscAbsScalar(mask[*aj])) {
1031           if (b) bb[i] -= *aa*xx[*aj];
1032           *aa = 0.0;
1033         }
1034         aa++;
1035         aj++;
1036       }
1037     }
1038   }
1039   if (x && b) {
1040     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1041     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1042   }
1043   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1044   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1045   ierr = PetscFree(lrows);CHKERRQ(ierr);
1046 
1047   /* only change matrix nonzero state if pattern was allowed to be changed */
1048   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1049     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1050     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1056 {
1057   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1058   PetscErrorCode ierr;
1059   PetscInt       nt;
1060   VecScatter     Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1064   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1065 
1066   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1067   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1068   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1074 {
1075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087   VecScatter     Mvctx = a->Mvctx;
1088 
1089   PetscFunctionBegin;
1090   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1091   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1092   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1093   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1094   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   /* do nondiagonal part */
1105   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1106   /* do local part */
1107   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1108   /* add partial results together */
1109   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1110   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1115 {
1116   MPI_Comm       comm;
1117   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1118   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1119   IS             Me,Notme;
1120   PetscErrorCode ierr;
1121   PetscInt       M,N,first,last,*notme,i;
1122   PetscBool      lf;
1123   PetscMPIInt    size;
1124 
1125   PetscFunctionBegin;
1126   /* Easy test: symmetric diagonal block */
1127   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1128   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1129   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1130   if (!*f) PetscFunctionReturn(0);
1131   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1132   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1133   if (size == 1) PetscFunctionReturn(0);
1134 
1135   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1136   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1137   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1138   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1139   for (i=0; i<first; i++) notme[i] = i;
1140   for (i=last; i<M; i++) notme[i-last+first] = i;
1141   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1142   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1143   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1144   Aoff = Aoffs[0];
1145   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1146   Boff = Boffs[0];
1147   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1148   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1149   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1150   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1151   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1152   ierr = PetscFree(notme);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1157 {
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1166 {
1167   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1168   PetscErrorCode ierr;
1169 
1170   PetscFunctionBegin;
1171   /* do nondiagonal part */
1172   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1173   /* do local part */
1174   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1175   /* add partial results together */
1176   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1177   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 /*
1182   This only works correctly for square matrices where the subblock A->A is the
1183    diagonal block
1184 */
1185 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1186 {
1187   PetscErrorCode ierr;
1188   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1189 
1190   PetscFunctionBegin;
1191   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1192   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1193   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1204   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1209 {
1210   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214 #if defined(PETSC_USE_LOG)
1215   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1216 #endif
1217   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1218   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1219   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1220   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1221 #if defined(PETSC_USE_CTABLE)
1222   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1223 #else
1224   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1225 #endif
1226   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1227   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1228   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1229   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1230   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1231   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1232   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1233 
1234   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1243 #if defined(PETSC_HAVE_ELEMENTAL)
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1245 #endif
1246 #if defined(PETSC_HAVE_HYPRE)
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1249 #endif
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1259   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1260   PetscErrorCode ierr;
1261   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1262   int            fd;
1263   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1264   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1265   PetscScalar    *column_values;
1266   PetscInt       message_count,flowcontrolcount;
1267   FILE           *file;
1268 
1269   PetscFunctionBegin;
1270   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1272   nz   = A->nz + B->nz;
1273   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1274   if (!rank) {
1275     header[0] = MAT_FILE_CLASSID;
1276     header[1] = mat->rmap->N;
1277     header[2] = mat->cmap->N;
1278 
1279     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1280     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     /* get largest number of rows any processor has */
1282     rlen  = mat->rmap->n;
1283     range = mat->rmap->range;
1284     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1285   } else {
1286     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     rlen = mat->rmap->n;
1288   }
1289 
1290   /* load up the local row counts */
1291   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1292   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1293 
1294   /* store the row lengths to the file */
1295   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1296   if (!rank) {
1297     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1298     for (i=1; i<size; i++) {
1299       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1300       rlen = range[i+1] - range[i];
1301       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1303     }
1304     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1305   } else {
1306     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1307     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1308     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1309   }
1310   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1311 
1312   /* load up the local column indices */
1313   nzmax = nz; /* th processor needs space a largest processor needs */
1314   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1316   cnt   = 0;
1317   for (i=0; i<mat->rmap->n; i++) {
1318     for (j=B->i[i]; j<B->i[i+1]; j++) {
1319       if ((col = garray[B->j[j]]) > cstart) break;
1320       column_indices[cnt++] = col;
1321     }
1322     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1323     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1324   }
1325   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1326 
1327   /* store the column indices to the file */
1328   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1329   if (!rank) {
1330     MPI_Status status;
1331     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     for (i=1; i<size; i++) {
1333       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1334       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1335       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1336       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1337       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1338     }
1339     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1340   } else {
1341     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1342     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1343     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1344     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1345   }
1346   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1347 
1348   /* load up the local column values */
1349   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1350   cnt  = 0;
1351   for (i=0; i<mat->rmap->n; i++) {
1352     for (j=B->i[i]; j<B->i[i+1]; j++) {
1353       if (garray[B->j[j]] > cstart) break;
1354       column_values[cnt++] = B->a[j];
1355     }
1356     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1357     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1358   }
1359   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1360 
1361   /* store the column values to the file */
1362   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1363   if (!rank) {
1364     MPI_Status status;
1365     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1366     for (i=1; i<size; i++) {
1367       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1368       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1369       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1370       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1371       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1372     }
1373     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1374   } else {
1375     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1376     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1378     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1379   }
1380   ierr = PetscFree(column_values);CHKERRQ(ierr);
1381 
1382   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1383   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #include <petscdraw.h>
1388 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1389 {
1390   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1391   PetscErrorCode    ierr;
1392   PetscMPIInt       rank = aij->rank,size = aij->size;
1393   PetscBool         isdraw,iascii,isbinary;
1394   PetscViewer       sviewer;
1395   PetscViewerFormat format;
1396 
1397   PetscFunctionBegin;
1398   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1401   if (iascii) {
1402     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1403     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1404       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1405       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1406       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1407       for (i=0; i<(PetscInt)size; i++) {
1408         nmax = PetscMax(nmax,nz[i]);
1409         nmin = PetscMin(nmin,nz[i]);
1410         navg += nz[i];
1411       }
1412       ierr = PetscFree(nz);CHKERRQ(ierr);
1413       navg = navg/size;
1414       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1415       PetscFunctionReturn(0);
1416     }
1417     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1418     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1419       MatInfo   info;
1420       PetscBool inodes;
1421 
1422       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1423       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1424       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1425       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1426       if (!inodes) {
1427         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1428                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1429       } else {
1430         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1431                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1432       }
1433       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1434       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1435       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1436       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1437       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1438       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1440       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1441       PetscFunctionReturn(0);
1442     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1443       PetscInt inodecount,inodelimit,*inodes;
1444       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1445       if (inodes) {
1446         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1447       } else {
1448         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1449       }
1450       PetscFunctionReturn(0);
1451     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1452       PetscFunctionReturn(0);
1453     }
1454   } else if (isbinary) {
1455     if (size == 1) {
1456       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1457       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1458     } else {
1459       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1460     }
1461     PetscFunctionReturn(0);
1462   } else if (iascii && size == 1) {
1463     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1464     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1465     PetscFunctionReturn(0);
1466   } else if (isdraw) {
1467     PetscDraw draw;
1468     PetscBool isnull;
1469     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1470     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1471     if (isnull) PetscFunctionReturn(0);
1472   }
1473 
1474   { /* assemble the entire matrix onto first processor */
1475     Mat A = NULL, Av;
1476     IS  isrow,iscol;
1477 
1478     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1479     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1480     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1481     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1482 /*  The commented code uses MatCreateSubMatrices instead */
1483 /*
1484     Mat *AA, A = NULL, Av;
1485     IS  isrow,iscol;
1486 
1487     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1488     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1489     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1490     if (!rank) {
1491        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1492        A    = AA[0];
1493        Av   = AA[0];
1494     }
1495     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1496 */
1497     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1498     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1499     /*
1500        Everyone has to call to draw the matrix since the graphics waits are
1501        synchronized across all processors that share the PetscDraw object
1502     */
1503     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1504     if (!rank) {
1505       if (((PetscObject)mat)->name) {
1506         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1507       }
1508       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1509     }
1510     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1511     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1512     ierr = MatDestroy(&A);CHKERRQ(ierr);
1513   }
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1518 {
1519   PetscErrorCode ierr;
1520   PetscBool      iascii,isdraw,issocket,isbinary;
1521 
1522   PetscFunctionBegin;
1523   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1524   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1525   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1526   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1527   if (iascii || isdraw || isbinary || issocket) {
1528     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1534 {
1535   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1536   PetscErrorCode ierr;
1537   Vec            bb1 = 0;
1538   PetscBool      hasop;
1539 
1540   PetscFunctionBegin;
1541   if (flag == SOR_APPLY_UPPER) {
1542     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1543     PetscFunctionReturn(0);
1544   }
1545 
1546   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1547     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1548   }
1549 
1550   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1551     if (flag & SOR_ZERO_INITIAL_GUESS) {
1552       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1553       its--;
1554     }
1555 
1556     while (its--) {
1557       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1559 
1560       /* update rhs: bb1 = bb - B*x */
1561       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1562       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1563 
1564       /* local sweep */
1565       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1566     }
1567   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1568     if (flag & SOR_ZERO_INITIAL_GUESS) {
1569       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1570       its--;
1571     }
1572     while (its--) {
1573       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1575 
1576       /* update rhs: bb1 = bb - B*x */
1577       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1578       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1579 
1580       /* local sweep */
1581       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1582     }
1583   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1584     if (flag & SOR_ZERO_INITIAL_GUESS) {
1585       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1586       its--;
1587     }
1588     while (its--) {
1589       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1591 
1592       /* update rhs: bb1 = bb - B*x */
1593       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1594       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1595 
1596       /* local sweep */
1597       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1598     }
1599   } else if (flag & SOR_EISENSTAT) {
1600     Vec xx1;
1601 
1602     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1603     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1604 
1605     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607     if (!mat->diag) {
1608       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1609       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1610     }
1611     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1612     if (hasop) {
1613       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1614     } else {
1615       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1616     }
1617     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1618 
1619     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1620 
1621     /* local sweep */
1622     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1623     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1624     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1625   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1626 
1627   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1628 
1629   matin->factorerrortype = mat->A->factorerrortype;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1634 {
1635   Mat            aA,aB,Aperm;
1636   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1637   PetscScalar    *aa,*ba;
1638   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1639   PetscSF        rowsf,sf;
1640   IS             parcolp = NULL;
1641   PetscBool      done;
1642   PetscErrorCode ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1646   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1647   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1648   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1649 
1650   /* Invert row permutation to find out where my rows should go */
1651   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1652   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1653   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1655   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1656   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1657 
1658   /* Invert column permutation to find out where my columns should go */
1659   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1660   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1661   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1662   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1663   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1665   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1666 
1667   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1668   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1669   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1670 
1671   /* Find out where my gcols should go */
1672   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1673   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1674   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1675   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1676   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1677   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1678   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1679   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1680 
1681   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1682   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1683   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1684   for (i=0; i<m; i++) {
1685     PetscInt row = rdest[i],rowner;
1686     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1687     for (j=ai[i]; j<ai[i+1]; j++) {
1688       PetscInt cowner,col = cdest[aj[j]];
1689       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1690       if (rowner == cowner) dnnz[i]++;
1691       else onnz[i]++;
1692     }
1693     for (j=bi[i]; j<bi[i+1]; j++) {
1694       PetscInt cowner,col = gcdest[bj[j]];
1695       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1696       if (rowner == cowner) dnnz[i]++;
1697       else onnz[i]++;
1698     }
1699   }
1700   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1701   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1702   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1703   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1704   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1705 
1706   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1707   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1708   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1709   for (i=0; i<m; i++) {
1710     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1711     PetscInt j0,rowlen;
1712     rowlen = ai[i+1] - ai[i];
1713     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1714       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1715       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1716     }
1717     rowlen = bi[i+1] - bi[i];
1718     for (j0=j=0; j<rowlen; j0=j) {
1719       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1720       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1721     }
1722   }
1723   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1726   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1727   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1728   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1729   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1730   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1731   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1732   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1733   *B = Aperm;
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1738 {
1739   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1744   if (ghosts) *ghosts = aij->garray;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1749 {
1750   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1751   Mat            A    = mat->A,B = mat->B;
1752   PetscErrorCode ierr;
1753   PetscReal      isend[5],irecv[5];
1754 
1755   PetscFunctionBegin;
1756   info->block_size = 1.0;
1757   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1758 
1759   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1760   isend[3] = info->memory;  isend[4] = info->mallocs;
1761 
1762   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1763 
1764   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1765   isend[3] += info->memory;  isend[4] += info->mallocs;
1766   if (flag == MAT_LOCAL) {
1767     info->nz_used      = isend[0];
1768     info->nz_allocated = isend[1];
1769     info->nz_unneeded  = isend[2];
1770     info->memory       = isend[3];
1771     info->mallocs      = isend[4];
1772   } else if (flag == MAT_GLOBAL_MAX) {
1773     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1774 
1775     info->nz_used      = irecv[0];
1776     info->nz_allocated = irecv[1];
1777     info->nz_unneeded  = irecv[2];
1778     info->memory       = irecv[3];
1779     info->mallocs      = irecv[4];
1780   } else if (flag == MAT_GLOBAL_SUM) {
1781     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1782 
1783     info->nz_used      = irecv[0];
1784     info->nz_allocated = irecv[1];
1785     info->nz_unneeded  = irecv[2];
1786     info->memory       = irecv[3];
1787     info->mallocs      = irecv[4];
1788   }
1789   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1790   info->fill_ratio_needed = 0;
1791   info->factor_mallocs    = 0;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1796 {
1797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1798   PetscErrorCode ierr;
1799 
1800   PetscFunctionBegin;
1801   switch (op) {
1802   case MAT_NEW_NONZERO_LOCATIONS:
1803   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1804   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1805   case MAT_KEEP_NONZERO_PATTERN:
1806   case MAT_NEW_NONZERO_LOCATION_ERR:
1807   case MAT_USE_INODES:
1808   case MAT_IGNORE_ZERO_ENTRIES:
1809     MatCheckPreallocated(A,1);
1810     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1811     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_ROW_ORIENTED:
1814     MatCheckPreallocated(A,1);
1815     a->roworiented = flg;
1816 
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_NEW_DIAGONALS:
1821     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1822     break;
1823   case MAT_IGNORE_OFF_PROC_ENTRIES:
1824     a->donotstash = flg;
1825     break;
1826   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1827   case MAT_SPD:
1828   case MAT_SYMMETRIC:
1829   case MAT_STRUCTURALLY_SYMMETRIC:
1830   case MAT_HERMITIAN:
1831   case MAT_SYMMETRY_ETERNAL:
1832     break;
1833   case MAT_SUBMAT_SINGLEIS:
1834     A->submat_singleis = flg;
1835     break;
1836   case MAT_STRUCTURE_ONLY:
1837     /* The option is handled directly by MatSetOption() */
1838     break;
1839   default:
1840     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1841   }
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1846 {
1847   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1848   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1849   PetscErrorCode ierr;
1850   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1851   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1852   PetscInt       *cmap,*idx_p;
1853 
1854   PetscFunctionBegin;
1855   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1856   mat->getrowactive = PETSC_TRUE;
1857 
1858   if (!mat->rowvalues && (idx || v)) {
1859     /*
1860         allocate enough space to hold information from the longest row.
1861     */
1862     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1863     PetscInt   max = 1,tmp;
1864     for (i=0; i<matin->rmap->n; i++) {
1865       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1866       if (max < tmp) max = tmp;
1867     }
1868     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1869   }
1870 
1871   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1872   lrow = row - rstart;
1873 
1874   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1875   if (!v)   {pvA = 0; pvB = 0;}
1876   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1877   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1878   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1879   nztot = nzA + nzB;
1880 
1881   cmap = mat->garray;
1882   if (v  || idx) {
1883     if (nztot) {
1884       /* Sort by increasing column numbers, assuming A and B already sorted */
1885       PetscInt imark = -1;
1886       if (v) {
1887         *v = v_p = mat->rowvalues;
1888         for (i=0; i<nzB; i++) {
1889           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1890           else break;
1891         }
1892         imark = i;
1893         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1894         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1895       }
1896       if (idx) {
1897         *idx = idx_p = mat->rowindices;
1898         if (imark > -1) {
1899           for (i=0; i<imark; i++) {
1900             idx_p[i] = cmap[cworkB[i]];
1901           }
1902         } else {
1903           for (i=0; i<nzB; i++) {
1904             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1905             else break;
1906           }
1907           imark = i;
1908         }
1909         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1910         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1911       }
1912     } else {
1913       if (idx) *idx = 0;
1914       if (v)   *v   = 0;
1915     }
1916   }
1917   *nz  = nztot;
1918   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1919   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1924 {
1925   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1926 
1927   PetscFunctionBegin;
1928   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1929   aij->getrowactive = PETSC_FALSE;
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1934 {
1935   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1936   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1937   PetscErrorCode ierr;
1938   PetscInt       i,j,cstart = mat->cmap->rstart;
1939   PetscReal      sum = 0.0;
1940   MatScalar      *v;
1941 
1942   PetscFunctionBegin;
1943   if (aij->size == 1) {
1944     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1945   } else {
1946     if (type == NORM_FROBENIUS) {
1947       v = amat->a;
1948       for (i=0; i<amat->nz; i++) {
1949         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1950       }
1951       v = bmat->a;
1952       for (i=0; i<bmat->nz; i++) {
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954       }
1955       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1956       *norm = PetscSqrtReal(*norm);
1957       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1958     } else if (type == NORM_1) { /* max column norm */
1959       PetscReal *tmp,*tmp2;
1960       PetscInt  *jj,*garray = aij->garray;
1961       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1962       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1963       *norm = 0.0;
1964       v     = amat->a; jj = amat->j;
1965       for (j=0; j<amat->nz; j++) {
1966         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1967       }
1968       v = bmat->a; jj = bmat->j;
1969       for (j=0; j<bmat->nz; j++) {
1970         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1971       }
1972       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1973       for (j=0; j<mat->cmap->N; j++) {
1974         if (tmp2[j] > *norm) *norm = tmp2[j];
1975       }
1976       ierr = PetscFree(tmp);CHKERRQ(ierr);
1977       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1978       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1979     } else if (type == NORM_INFINITY) { /* max row norm */
1980       PetscReal ntemp = 0.0;
1981       for (j=0; j<aij->A->rmap->n; j++) {
1982         v   = amat->a + amat->i[j];
1983         sum = 0.0;
1984         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1985           sum += PetscAbsScalar(*v); v++;
1986         }
1987         v = bmat->a + bmat->i[j];
1988         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1989           sum += PetscAbsScalar(*v); v++;
1990         }
1991         if (sum > ntemp) ntemp = sum;
1992       }
1993       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1994       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1995     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1996   }
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2001 {
2002   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2003   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2004   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2005   PetscErrorCode ierr;
2006   Mat            B,A_diag,*B_diag;
2007   MatScalar      *array;
2008 
2009   PetscFunctionBegin;
2010   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2011   ai = Aloc->i; aj = Aloc->j;
2012   bi = Bloc->i; bj = Bloc->j;
2013   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2014     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2015     PetscSFNode          *oloc;
2016     PETSC_UNUSED PetscSF sf;
2017 
2018     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2019     /* compute d_nnz for preallocation */
2020     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2021     for (i=0; i<ai[ma]; i++) {
2022       d_nnz[aj[i]]++;
2023     }
2024     /* compute local off-diagonal contributions */
2025     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2026     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2027     /* map those to global */
2028     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2029     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2030     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2031     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2032     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2034     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2035 
2036     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2037     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2038     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2039     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2040     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2041     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2042   } else {
2043     B    = *matout;
2044     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2045   }
2046 
2047   b           = (Mat_MPIAIJ*)B->data;
2048   A_diag      = a->A;
2049   B_diag      = &b->A;
2050   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2051   A_diag_ncol = A_diag->cmap->N;
2052   B_diag_ilen = sub_B_diag->ilen;
2053   B_diag_i    = sub_B_diag->i;
2054 
2055   /* Set ilen for diagonal of B */
2056   for (i=0; i<A_diag_ncol; i++) {
2057     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2058   }
2059 
2060   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2061   very quickly (=without using MatSetValues), because all writes are local. */
2062   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2063 
2064   /* copy over the B part */
2065   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2066   array = Bloc->a;
2067   row   = A->rmap->rstart;
2068   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2069   cols_tmp = cols;
2070   for (i=0; i<mb; i++) {
2071     ncol = bi[i+1]-bi[i];
2072     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2073     row++;
2074     array += ncol; cols_tmp += ncol;
2075   }
2076   ierr = PetscFree(cols);CHKERRQ(ierr);
2077 
2078   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2081     *matout = B;
2082   } else {
2083     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2089 {
2090   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2091   Mat            a    = aij->A,b = aij->B;
2092   PetscErrorCode ierr;
2093   PetscInt       s1,s2,s3;
2094 
2095   PetscFunctionBegin;
2096   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2097   if (rr) {
2098     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2099     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2100     /* Overlap communication with computation. */
2101     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2102   }
2103   if (ll) {
2104     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2105     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2106     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2107   }
2108   /* scale  the diagonal block */
2109   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2110 
2111   if (rr) {
2112     /* Do a scatter end and then right scale the off-diagonal block */
2113     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2114     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2115   }
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2120 {
2121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2122   PetscErrorCode ierr;
2123 
2124   PetscFunctionBegin;
2125   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2130 {
2131   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2132   Mat            a,b,c,d;
2133   PetscBool      flg;
2134   PetscErrorCode ierr;
2135 
2136   PetscFunctionBegin;
2137   a = matA->A; b = matA->B;
2138   c = matB->A; d = matB->B;
2139 
2140   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2141   if (flg) {
2142     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2143   }
2144   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2149 {
2150   PetscErrorCode ierr;
2151   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2152   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2153 
2154   PetscFunctionBegin;
2155   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2156   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2157     /* because of the column compression in the off-processor part of the matrix a->B,
2158        the number of columns in a->B and b->B may be different, hence we cannot call
2159        the MatCopy() directly on the two parts. If need be, we can provide a more
2160        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2161        then copying the submatrices */
2162     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2163   } else {
2164     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2165     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2166   }
2167   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2172 {
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /*
2181    Computes the number of nonzeros per row needed for preallocation when X and Y
2182    have different nonzero structure.
2183 */
2184 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2185 {
2186   PetscInt       i,j,k,nzx,nzy;
2187 
2188   PetscFunctionBegin;
2189   /* Set the number of nonzeros in the new matrix */
2190   for (i=0; i<m; i++) {
2191     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2192     nzx = xi[i+1] - xi[i];
2193     nzy = yi[i+1] - yi[i];
2194     nnz[i] = 0;
2195     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2196       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2197       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2198       nnz[i]++;
2199     }
2200     for (; k<nzy; k++) nnz[i]++;
2201   }
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2206 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2207 {
2208   PetscErrorCode ierr;
2209   PetscInt       m = Y->rmap->N;
2210   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2211   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2212 
2213   PetscFunctionBegin;
2214   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2219 {
2220   PetscErrorCode ierr;
2221   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2222   PetscBLASInt   bnz,one=1;
2223   Mat_SeqAIJ     *x,*y;
2224 
2225   PetscFunctionBegin;
2226   if (str == SAME_NONZERO_PATTERN) {
2227     PetscScalar alpha = a;
2228     x    = (Mat_SeqAIJ*)xx->A->data;
2229     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2230     y    = (Mat_SeqAIJ*)yy->A->data;
2231     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2232     x    = (Mat_SeqAIJ*)xx->B->data;
2233     y    = (Mat_SeqAIJ*)yy->B->data;
2234     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2235     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2236     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2237   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2238     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2239   } else {
2240     Mat      B;
2241     PetscInt *nnz_d,*nnz_o;
2242     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2243     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2244     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2245     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2246     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2247     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2248     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2250     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2251     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2252     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2253     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2255     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2261 
2262 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2263 {
2264 #if defined(PETSC_USE_COMPLEX)
2265   PetscErrorCode ierr;
2266   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2270   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2271 #else
2272   PetscFunctionBegin;
2273 #endif
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2278 {
2279   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2284   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2289 {
2290   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2291   PetscErrorCode ierr;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2295   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2300 {
2301   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2302   PetscErrorCode ierr;
2303   PetscInt       i,*idxb = 0;
2304   PetscScalar    *va,*vb;
2305   Vec            vtmp;
2306 
2307   PetscFunctionBegin;
2308   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2309   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2310   if (idx) {
2311     for (i=0; i<A->rmap->n; i++) {
2312       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2313     }
2314   }
2315 
2316   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2317   if (idx) {
2318     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2319   }
2320   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2321   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2322 
2323   for (i=0; i<A->rmap->n; i++) {
2324     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2325       va[i] = vb[i];
2326       if (idx) idx[i] = a->garray[idxb[i]];
2327     }
2328   }
2329 
2330   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2331   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2332   ierr = PetscFree(idxb);CHKERRQ(ierr);
2333   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2338 {
2339   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2340   PetscErrorCode ierr;
2341   PetscInt       i,*idxb = 0;
2342   PetscScalar    *va,*vb;
2343   Vec            vtmp;
2344 
2345   PetscFunctionBegin;
2346   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2347   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2348   if (idx) {
2349     for (i=0; i<A->cmap->n; i++) {
2350       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2351     }
2352   }
2353 
2354   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2355   if (idx) {
2356     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2357   }
2358   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2359   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2360 
2361   for (i=0; i<A->rmap->n; i++) {
2362     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2363       va[i] = vb[i];
2364       if (idx) idx[i] = a->garray[idxb[i]];
2365     }
2366   }
2367 
2368   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2369   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2370   ierr = PetscFree(idxb);CHKERRQ(ierr);
2371   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2376 {
2377   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2378   PetscInt       n      = A->rmap->n;
2379   PetscInt       cstart = A->cmap->rstart;
2380   PetscInt       *cmap  = mat->garray;
2381   PetscInt       *diagIdx, *offdiagIdx;
2382   Vec            diagV, offdiagV;
2383   PetscScalar    *a, *diagA, *offdiagA;
2384   PetscInt       r;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2389   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2390   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2391   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2392   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2393   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2394   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2395   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2396   for (r = 0; r < n; ++r) {
2397     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2398       a[r]   = diagA[r];
2399       idx[r] = cstart + diagIdx[r];
2400     } else {
2401       a[r]   = offdiagA[r];
2402       idx[r] = cmap[offdiagIdx[r]];
2403     }
2404   }
2405   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2406   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2407   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2408   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2409   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2410   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2415 {
2416   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2417   PetscInt       n      = A->rmap->n;
2418   PetscInt       cstart = A->cmap->rstart;
2419   PetscInt       *cmap  = mat->garray;
2420   PetscInt       *diagIdx, *offdiagIdx;
2421   Vec            diagV, offdiagV;
2422   PetscScalar    *a, *diagA, *offdiagA;
2423   PetscInt       r;
2424   PetscErrorCode ierr;
2425 
2426   PetscFunctionBegin;
2427   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2428   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2429   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2430   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2431   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2432   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2433   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2434   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2435   for (r = 0; r < n; ++r) {
2436     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2437       a[r]   = diagA[r];
2438       idx[r] = cstart + diagIdx[r];
2439     } else {
2440       a[r]   = offdiagA[r];
2441       idx[r] = cmap[offdiagIdx[r]];
2442     }
2443   }
2444   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2445   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2446   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2447   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2448   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2449   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2454 {
2455   PetscErrorCode ierr;
2456   Mat            *dummy;
2457 
2458   PetscFunctionBegin;
2459   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2460   *newmat = *dummy;
2461   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2466 {
2467   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2468   PetscErrorCode ierr;
2469 
2470   PetscFunctionBegin;
2471   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2472   A->factorerrortype = a->A->factorerrortype;
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2477 {
2478   PetscErrorCode ierr;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2480 
2481   PetscFunctionBegin;
2482   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2483   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2484   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2485   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2490 {
2491   PetscFunctionBegin;
2492   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2493   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 /*@
2498    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2499 
2500    Collective on Mat
2501 
2502    Input Parameters:
2503 +    A - the matrix
2504 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2505 
2506  Level: advanced
2507 
2508 @*/
2509 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2510 {
2511   PetscErrorCode       ierr;
2512 
2513   PetscFunctionBegin;
2514   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2519 {
2520   PetscErrorCode       ierr;
2521   PetscBool            sc = PETSC_FALSE,flg;
2522 
2523   PetscFunctionBegin;
2524   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2525   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2526   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2527   if (flg) {
2528     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2529   }
2530   ierr = PetscOptionsTail();CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2535 {
2536   PetscErrorCode ierr;
2537   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2538   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2539 
2540   PetscFunctionBegin;
2541   if (!Y->preallocated) {
2542     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2543   } else if (!aij->nz) {
2544     PetscInt nonew = aij->nonew;
2545     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2546     aij->nonew = nonew;
2547   }
2548   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2553 {
2554   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2555   PetscErrorCode ierr;
2556 
2557   PetscFunctionBegin;
2558   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2559   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2560   if (d) {
2561     PetscInt rstart;
2562     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2563     *d += rstart;
2564 
2565   }
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2570 {
2571   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2572   PetscErrorCode ierr;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 /* -------------------------------------------------------------------*/
2580 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2581                                        MatGetRow_MPIAIJ,
2582                                        MatRestoreRow_MPIAIJ,
2583                                        MatMult_MPIAIJ,
2584                                 /* 4*/ MatMultAdd_MPIAIJ,
2585                                        MatMultTranspose_MPIAIJ,
2586                                        MatMultTransposeAdd_MPIAIJ,
2587                                        0,
2588                                        0,
2589                                        0,
2590                                 /*10*/ 0,
2591                                        0,
2592                                        0,
2593                                        MatSOR_MPIAIJ,
2594                                        MatTranspose_MPIAIJ,
2595                                 /*15*/ MatGetInfo_MPIAIJ,
2596                                        MatEqual_MPIAIJ,
2597                                        MatGetDiagonal_MPIAIJ,
2598                                        MatDiagonalScale_MPIAIJ,
2599                                        MatNorm_MPIAIJ,
2600                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2601                                        MatAssemblyEnd_MPIAIJ,
2602                                        MatSetOption_MPIAIJ,
2603                                        MatZeroEntries_MPIAIJ,
2604                                 /*24*/ MatZeroRows_MPIAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                 /*29*/ MatSetUp_MPIAIJ,
2610                                        0,
2611                                        0,
2612                                        MatGetDiagonalBlock_MPIAIJ,
2613                                        0,
2614                                 /*34*/ MatDuplicate_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*39*/ MatAXPY_MPIAIJ,
2620                                        MatCreateSubMatrices_MPIAIJ,
2621                                        MatIncreaseOverlap_MPIAIJ,
2622                                        MatGetValues_MPIAIJ,
2623                                        MatCopy_MPIAIJ,
2624                                 /*44*/ MatGetRowMax_MPIAIJ,
2625                                        MatScale_MPIAIJ,
2626                                        MatShift_MPIAIJ,
2627                                        MatDiagonalSet_MPIAIJ,
2628                                        MatZeroRowsColumns_MPIAIJ,
2629                                 /*49*/ MatSetRandom_MPIAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2635                                        0,
2636                                        MatSetUnfactored_MPIAIJ,
2637                                        MatPermute_MPIAIJ,
2638                                        0,
2639                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2640                                        MatDestroy_MPIAIJ,
2641                                        MatView_MPIAIJ,
2642                                        0,
2643                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2644                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2645                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2650                                        MatGetRowMinAbs_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*75*/ MatFDColoringApply_AIJ,
2656                                        MatSetFromOptions_MPIAIJ,
2657                                        0,
2658                                        0,
2659                                        MatFindZeroDiagonals_MPIAIJ,
2660                                 /*80*/ 0,
2661                                        0,
2662                                        0,
2663                                 /*83*/ MatLoad_MPIAIJ,
2664                                        MatIsSymmetric_MPIAIJ,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2670                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2671                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2672                                        MatPtAP_MPIAIJ_MPIAIJ,
2673                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2674                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                 /*99*/ 0,
2680                                        0,
2681                                        0,
2682                                        MatConjugate_MPIAIJ,
2683                                        0,
2684                                 /*104*/MatSetValuesRow_MPIAIJ,
2685                                        MatRealPart_MPIAIJ,
2686                                        MatImaginaryPart_MPIAIJ,
2687                                        0,
2688                                        0,
2689                                 /*109*/0,
2690                                        0,
2691                                        MatGetRowMin_MPIAIJ,
2692                                        0,
2693                                        MatMissingDiagonal_MPIAIJ,
2694                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2695                                        0,
2696                                        MatGetGhosts_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                 /*119*/0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        MatGetMultiProcBlock_MPIAIJ,
2704                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2705                                        MatGetColumnNorms_MPIAIJ,
2706                                        MatInvertBlockDiagonal_MPIAIJ,
2707                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2708                                        MatCreateSubMatricesMPI_MPIAIJ,
2709                                 /*129*/0,
2710                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2711                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2712                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2713                                        0,
2714                                 /*134*/0,
2715                                        0,
2716                                        MatRARt_MPIAIJ_MPIAIJ,
2717                                        0,
2718                                        0,
2719                                 /*139*/MatSetBlockSizes_MPIAIJ,
2720                                        0,
2721                                        0,
2722                                        MatFDColoringSetUp_MPIXAIJ,
2723                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2724                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2725 };
2726 
2727 /* ----------------------------------------------------------------------------------------*/
2728 
2729 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2730 {
2731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2736   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2741 {
2742   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2747   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2752 {
2753   Mat_MPIAIJ     *b;
2754   PetscErrorCode ierr;
2755 
2756   PetscFunctionBegin;
2757   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2758   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2759   b = (Mat_MPIAIJ*)B->data;
2760 
2761 #if defined(PETSC_USE_CTABLE)
2762   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2763 #else
2764   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2765 #endif
2766   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2767   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2768   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2769 
2770   /* Because the B will have been resized we simply destroy it and create a new one each time */
2771   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2772   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2773   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2774   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2775   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2776   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2777 
2778   if (!B->preallocated) {
2779     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2780     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2781     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2782     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2783     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2784   }
2785 
2786   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2787   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2788   B->preallocated  = PETSC_TRUE;
2789   B->was_assembled = PETSC_FALSE;
2790   B->assembled     = PETSC_FALSE;
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2795 {
2796   Mat_MPIAIJ     *b;
2797   PetscErrorCode ierr;
2798 
2799   PetscFunctionBegin;
2800   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2801   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2803   b = (Mat_MPIAIJ*)B->data;
2804 
2805 #if defined(PETSC_USE_CTABLE)
2806   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2807 #else
2808   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2809 #endif
2810   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2811   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2812   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2813 
2814   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2815   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2816   B->preallocated  = PETSC_TRUE;
2817   B->was_assembled = PETSC_FALSE;
2818   B->assembled = PETSC_FALSE;
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2823 {
2824   Mat            mat;
2825   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2826   PetscErrorCode ierr;
2827 
2828   PetscFunctionBegin;
2829   *newmat = 0;
2830   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2831   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2832   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2833   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2834   a       = (Mat_MPIAIJ*)mat->data;
2835 
2836   mat->factortype   = matin->factortype;
2837   mat->assembled    = PETSC_TRUE;
2838   mat->insertmode   = NOT_SET_VALUES;
2839   mat->preallocated = PETSC_TRUE;
2840 
2841   a->size         = oldmat->size;
2842   a->rank         = oldmat->rank;
2843   a->donotstash   = oldmat->donotstash;
2844   a->roworiented  = oldmat->roworiented;
2845   a->rowindices   = 0;
2846   a->rowvalues    = 0;
2847   a->getrowactive = PETSC_FALSE;
2848 
2849   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2850   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2851 
2852   if (oldmat->colmap) {
2853 #if defined(PETSC_USE_CTABLE)
2854     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2855 #else
2856     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2857     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2858     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2859 #endif
2860   } else a->colmap = 0;
2861   if (oldmat->garray) {
2862     PetscInt len;
2863     len  = oldmat->B->cmap->n;
2864     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2865     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2866     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2867   } else a->garray = 0;
2868 
2869   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2870   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2871   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2872   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2873 
2874   if (oldmat->Mvctx_mpi1) {
2875     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2876     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2877   }
2878 
2879   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2880   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2881   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2882   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2883   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2884   *newmat = mat;
2885   PetscFunctionReturn(0);
2886 }
2887 
2888 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2889 {
2890   PetscBool      isbinary, ishdf5;
2891   PetscErrorCode ierr;
2892 
2893   PetscFunctionBegin;
2894   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2895   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2896   /* force binary viewer to load .info file if it has not yet done so */
2897   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2898   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2899   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2900   if (isbinary) {
2901     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2902   } else if (ishdf5) {
2903 #if defined(PETSC_HAVE_HDF5)
2904     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2905 #else
2906     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2907 #endif
2908   } else {
2909     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2910   }
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2915 {
2916   PetscScalar    *vals,*svals;
2917   MPI_Comm       comm;
2918   PetscErrorCode ierr;
2919   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2920   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2921   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2922   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2923   PetscInt       cend,cstart,n,*rowners;
2924   int            fd;
2925   PetscInt       bs = newMat->rmap->bs;
2926 
2927   PetscFunctionBegin;
2928   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2929   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2930   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2931   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2932   if (!rank) {
2933     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2934     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2935     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2936   }
2937 
2938   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2939   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2940   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2941   if (bs < 0) bs = 1;
2942 
2943   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2944   M    = header[1]; N = header[2];
2945 
2946   /* If global sizes are set, check if they are consistent with that given in the file */
2947   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2948   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2949 
2950   /* determine ownership of all (block) rows */
2951   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2952   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2953   else m = newMat->rmap->n; /* Set by user */
2954 
2955   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2956   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2957 
2958   /* First process needs enough room for process with most rows */
2959   if (!rank) {
2960     mmax = rowners[1];
2961     for (i=2; i<=size; i++) {
2962       mmax = PetscMax(mmax, rowners[i]);
2963     }
2964   } else mmax = -1;             /* unused, but compilers complain */
2965 
2966   rowners[0] = 0;
2967   for (i=2; i<=size; i++) {
2968     rowners[i] += rowners[i-1];
2969   }
2970   rstart = rowners[rank];
2971   rend   = rowners[rank+1];
2972 
2973   /* distribute row lengths to all processors */
2974   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2975   if (!rank) {
2976     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2977     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2978     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2979     for (j=0; j<m; j++) {
2980       procsnz[0] += ourlens[j];
2981     }
2982     for (i=1; i<size; i++) {
2983       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2984       /* calculate the number of nonzeros on each processor */
2985       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2986         procsnz[i] += rowlengths[j];
2987       }
2988       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2989     }
2990     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2991   } else {
2992     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2993   }
2994 
2995   if (!rank) {
2996     /* determine max buffer needed and allocate it */
2997     maxnz = 0;
2998     for (i=0; i<size; i++) {
2999       maxnz = PetscMax(maxnz,procsnz[i]);
3000     }
3001     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3002 
3003     /* read in my part of the matrix column indices  */
3004     nz   = procsnz[0];
3005     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3006     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3007 
3008     /* read in every one elses and ship off */
3009     for (i=1; i<size; i++) {
3010       nz   = procsnz[i];
3011       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3012       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3013     }
3014     ierr = PetscFree(cols);CHKERRQ(ierr);
3015   } else {
3016     /* determine buffer space needed for message */
3017     nz = 0;
3018     for (i=0; i<m; i++) {
3019       nz += ourlens[i];
3020     }
3021     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3022 
3023     /* receive message of column indices*/
3024     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3025   }
3026 
3027   /* determine column ownership if matrix is not square */
3028   if (N != M) {
3029     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3030     else n = newMat->cmap->n;
3031     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3032     cstart = cend - n;
3033   } else {
3034     cstart = rstart;
3035     cend   = rend;
3036     n      = cend - cstart;
3037   }
3038 
3039   /* loop over local rows, determining number of off diagonal entries */
3040   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3041   jj   = 0;
3042   for (i=0; i<m; i++) {
3043     for (j=0; j<ourlens[i]; j++) {
3044       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3045       jj++;
3046     }
3047   }
3048 
3049   for (i=0; i<m; i++) {
3050     ourlens[i] -= offlens[i];
3051   }
3052   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3053 
3054   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3055 
3056   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3057 
3058   for (i=0; i<m; i++) {
3059     ourlens[i] += offlens[i];
3060   }
3061 
3062   if (!rank) {
3063     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3064 
3065     /* read in my part of the matrix numerical values  */
3066     nz   = procsnz[0];
3067     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3068 
3069     /* insert into matrix */
3070     jj      = rstart;
3071     smycols = mycols;
3072     svals   = vals;
3073     for (i=0; i<m; i++) {
3074       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3075       smycols += ourlens[i];
3076       svals   += ourlens[i];
3077       jj++;
3078     }
3079 
3080     /* read in other processors and ship out */
3081     for (i=1; i<size; i++) {
3082       nz   = procsnz[i];
3083       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3084       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3085     }
3086     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3087   } else {
3088     /* receive numeric values */
3089     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3090 
3091     /* receive message of values*/
3092     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3093 
3094     /* insert into matrix */
3095     jj      = rstart;
3096     smycols = mycols;
3097     svals   = vals;
3098     for (i=0; i<m; i++) {
3099       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3100       smycols += ourlens[i];
3101       svals   += ourlens[i];
3102       jj++;
3103     }
3104   }
3105   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3106   ierr = PetscFree(vals);CHKERRQ(ierr);
3107   ierr = PetscFree(mycols);CHKERRQ(ierr);
3108   ierr = PetscFree(rowners);CHKERRQ(ierr);
3109   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3110   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 /* Not scalable because of ISAllGather() unless getting all columns. */
3115 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3116 {
3117   PetscErrorCode ierr;
3118   IS             iscol_local;
3119   PetscBool      isstride;
3120   PetscMPIInt    lisstride=0,gisstride;
3121 
3122   PetscFunctionBegin;
3123   /* check if we are grabbing all columns*/
3124   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3125 
3126   if (isstride) {
3127     PetscInt  start,len,mstart,mlen;
3128     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3129     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3130     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3131     if (mstart == start && mlen-mstart == len) lisstride = 1;
3132   }
3133 
3134   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3135   if (gisstride) {
3136     PetscInt N;
3137     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3138     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3139     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3140     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3141   } else {
3142     PetscInt cbs;
3143     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3144     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3145     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3146   }
3147 
3148   *isseq = iscol_local;
3149   PetscFunctionReturn(0);
3150 }
3151 
3152 /*
3153  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3154  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3155 
3156  Input Parameters:
3157    mat - matrix
3158    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3159            i.e., mat->rstart <= isrow[i] < mat->rend
3160    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3161            i.e., mat->cstart <= iscol[i] < mat->cend
3162  Output Parameter:
3163    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3164    iscol_o - sequential column index set for retrieving mat->B
3165    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3166  */
3167 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3168 {
3169   PetscErrorCode ierr;
3170   Vec            x,cmap;
3171   const PetscInt *is_idx;
3172   PetscScalar    *xarray,*cmaparray;
3173   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3174   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3175   Mat            B=a->B;
3176   Vec            lvec=a->lvec,lcmap;
3177   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3178   MPI_Comm       comm;
3179   VecScatter     Mvctx=a->Mvctx;
3180 
3181   PetscFunctionBegin;
3182   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3183   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3184 
3185   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3186   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3187   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3188   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3189   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3190 
3191   /* Get start indices */
3192   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3193   isstart -= ncols;
3194   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3195 
3196   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3197   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3198   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3199   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3200   for (i=0; i<ncols; i++) {
3201     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3202     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3203     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3204   }
3205   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3206   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3207   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3208 
3209   /* Get iscol_d */
3210   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3211   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3212   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3213 
3214   /* Get isrow_d */
3215   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3216   rstart = mat->rmap->rstart;
3217   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3218   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3219   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3220   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3221 
3222   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3223   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3224   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3225 
3226   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3227   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3228   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3229 
3230   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3231 
3232   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3233   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3234 
3235   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3236   /* off-process column indices */
3237   count = 0;
3238   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3239   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3240 
3241   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3242   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3243   for (i=0; i<Bn; i++) {
3244     if (PetscRealPart(xarray[i]) > -1.0) {
3245       idx[count]     = i;                   /* local column index in off-diagonal part B */
3246       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3247       count++;
3248     }
3249   }
3250   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3251   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3252 
3253   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3254   /* cannot ensure iscol_o has same blocksize as iscol! */
3255 
3256   ierr = PetscFree(idx);CHKERRQ(ierr);
3257   *garray = cmap1;
3258 
3259   ierr = VecDestroy(&x);CHKERRQ(ierr);
3260   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3261   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3266 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3267 {
3268   PetscErrorCode ierr;
3269   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3270   Mat            M = NULL;
3271   MPI_Comm       comm;
3272   IS             iscol_d,isrow_d,iscol_o;
3273   Mat            Asub = NULL,Bsub = NULL;
3274   PetscInt       n;
3275 
3276   PetscFunctionBegin;
3277   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3278 
3279   if (call == MAT_REUSE_MATRIX) {
3280     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3281     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3282     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3283 
3284     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3285     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3286 
3287     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3288     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3289 
3290     /* Update diagonal and off-diagonal portions of submat */
3291     asub = (Mat_MPIAIJ*)(*submat)->data;
3292     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3293     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3294     if (n) {
3295       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3296     }
3297     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3298     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3299 
3300   } else { /* call == MAT_INITIAL_MATRIX) */
3301     const PetscInt *garray;
3302     PetscInt        BsubN;
3303 
3304     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3305     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3306 
3307     /* Create local submatrices Asub and Bsub */
3308     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3309     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3310 
3311     /* Create submatrix M */
3312     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3313 
3314     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3315     asub = (Mat_MPIAIJ*)M->data;
3316 
3317     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3318     n = asub->B->cmap->N;
3319     if (BsubN > n) {
3320       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3321       const PetscInt *idx;
3322       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3323       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3324 
3325       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3326       j = 0;
3327       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3328       for (i=0; i<n; i++) {
3329         if (j >= BsubN) break;
3330         while (subgarray[i] > garray[j]) j++;
3331 
3332         if (subgarray[i] == garray[j]) {
3333           idx_new[i] = idx[j++];
3334         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3335       }
3336       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3337 
3338       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3339       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3340 
3341     } else if (BsubN < n) {
3342       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3343     }
3344 
3345     ierr = PetscFree(garray);CHKERRQ(ierr);
3346     *submat = M;
3347 
3348     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3349     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3350     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3351 
3352     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3353     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3354 
3355     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3356     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3357   }
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3362 {
3363   PetscErrorCode ierr;
3364   IS             iscol_local=NULL,isrow_d;
3365   PetscInt       csize;
3366   PetscInt       n,i,j,start,end;
3367   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3368   MPI_Comm       comm;
3369 
3370   PetscFunctionBegin;
3371   /* If isrow has same processor distribution as mat,
3372      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3373   if (call == MAT_REUSE_MATRIX) {
3374     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3375     if (isrow_d) {
3376       sameRowDist  = PETSC_TRUE;
3377       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3378     } else {
3379       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3380       if (iscol_local) {
3381         sameRowDist  = PETSC_TRUE;
3382         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3383       }
3384     }
3385   } else {
3386     /* Check if isrow has same processor distribution as mat */
3387     sameDist[0] = PETSC_FALSE;
3388     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3389     if (!n) {
3390       sameDist[0] = PETSC_TRUE;
3391     } else {
3392       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3393       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3394       if (i >= start && j < end) {
3395         sameDist[0] = PETSC_TRUE;
3396       }
3397     }
3398 
3399     /* Check if iscol has same processor distribution as mat */
3400     sameDist[1] = PETSC_FALSE;
3401     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3402     if (!n) {
3403       sameDist[1] = PETSC_TRUE;
3404     } else {
3405       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3406       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3407       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3408     }
3409 
3410     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3411     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3412     sameRowDist = tsameDist[0];
3413   }
3414 
3415   if (sameRowDist) {
3416     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3417       /* isrow and iscol have same processor distribution as mat */
3418       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3419       PetscFunctionReturn(0);
3420     } else { /* sameRowDist */
3421       /* isrow has same processor distribution as mat */
3422       if (call == MAT_INITIAL_MATRIX) {
3423         PetscBool sorted;
3424         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3425         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3426         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3427         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3428 
3429         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3430         if (sorted) {
3431           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3432           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3433           PetscFunctionReturn(0);
3434         }
3435       } else { /* call == MAT_REUSE_MATRIX */
3436         IS    iscol_sub;
3437         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3438         if (iscol_sub) {
3439           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3440           PetscFunctionReturn(0);
3441         }
3442       }
3443     }
3444   }
3445 
3446   /* General case: iscol -> iscol_local which has global size of iscol */
3447   if (call == MAT_REUSE_MATRIX) {
3448     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3449     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3450   } else {
3451     if (!iscol_local) {
3452       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3453     }
3454   }
3455 
3456   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3457   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3458 
3459   if (call == MAT_INITIAL_MATRIX) {
3460     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3461     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3462   }
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 /*@C
3467      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3468          and "off-diagonal" part of the matrix in CSR format.
3469 
3470    Collective on MPI_Comm
3471 
3472    Input Parameters:
3473 +  comm - MPI communicator
3474 .  A - "diagonal" portion of matrix
3475 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3476 -  garray - global index of B columns
3477 
3478    Output Parameter:
3479 .   mat - the matrix, with input A as its local diagonal matrix
3480    Level: advanced
3481 
3482    Notes:
3483        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3484        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3485 
3486 .seealso: MatCreateMPIAIJWithSplitArrays()
3487 @*/
3488 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3489 {
3490   PetscErrorCode ierr;
3491   Mat_MPIAIJ     *maij;
3492   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3493   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3494   PetscScalar    *oa=b->a;
3495   Mat            Bnew;
3496   PetscInt       m,n,N;
3497 
3498   PetscFunctionBegin;
3499   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3500   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3501   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3502   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3503   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3504   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3505 
3506   /* Get global columns of mat */
3507   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3508 
3509   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3510   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3511   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3512   maij = (Mat_MPIAIJ*)(*mat)->data;
3513 
3514   (*mat)->preallocated = PETSC_TRUE;
3515 
3516   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3517   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3518 
3519   /* Set A as diagonal portion of *mat */
3520   maij->A = A;
3521 
3522   nz = oi[m];
3523   for (i=0; i<nz; i++) {
3524     col   = oj[i];
3525     oj[i] = garray[col];
3526   }
3527 
3528    /* Set Bnew as off-diagonal portion of *mat */
3529   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3530   bnew        = (Mat_SeqAIJ*)Bnew->data;
3531   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3532   maij->B     = Bnew;
3533 
3534   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3535 
3536   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3537   b->free_a       = PETSC_FALSE;
3538   b->free_ij      = PETSC_FALSE;
3539   ierr = MatDestroy(&B);CHKERRQ(ierr);
3540 
3541   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3542   bnew->free_a       = PETSC_TRUE;
3543   bnew->free_ij      = PETSC_TRUE;
3544 
3545   /* condense columns of maij->B */
3546   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3547   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3548   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3549   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3550   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3551   PetscFunctionReturn(0);
3552 }
3553 
3554 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3555 
3556 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3557 {
3558   PetscErrorCode ierr;
3559   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3560   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3561   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3562   Mat            M,Msub,B=a->B;
3563   MatScalar      *aa;
3564   Mat_SeqAIJ     *aij;
3565   PetscInt       *garray = a->garray,*colsub,Ncols;
3566   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3567   IS             iscol_sub,iscmap;
3568   const PetscInt *is_idx,*cmap;
3569   PetscBool      allcolumns=PETSC_FALSE;
3570   MPI_Comm       comm;
3571 
3572   PetscFunctionBegin;
3573   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3574 
3575   if (call == MAT_REUSE_MATRIX) {
3576     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3577     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3578     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3579 
3580     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3581     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3582 
3583     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3584     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3585 
3586     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3587 
3588   } else { /* call == MAT_INITIAL_MATRIX) */
3589     PetscBool flg;
3590 
3591     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3592     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3593 
3594     /* (1) iscol -> nonscalable iscol_local */
3595     /* Check for special case: each processor gets entire matrix columns */
3596     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3597     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3598     if (allcolumns) {
3599       iscol_sub = iscol_local;
3600       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3601       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3602 
3603     } else {
3604       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3605       PetscInt *idx,*cmap1,k;
3606       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3607       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3608       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3609       count = 0;
3610       k     = 0;
3611       for (i=0; i<Ncols; i++) {
3612         j = is_idx[i];
3613         if (j >= cstart && j < cend) {
3614           /* diagonal part of mat */
3615           idx[count]     = j;
3616           cmap1[count++] = i; /* column index in submat */
3617         } else if (Bn) {
3618           /* off-diagonal part of mat */
3619           if (j == garray[k]) {
3620             idx[count]     = j;
3621             cmap1[count++] = i;  /* column index in submat */
3622           } else if (j > garray[k]) {
3623             while (j > garray[k] && k < Bn-1) k++;
3624             if (j == garray[k]) {
3625               idx[count]     = j;
3626               cmap1[count++] = i; /* column index in submat */
3627             }
3628           }
3629         }
3630       }
3631       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3632 
3633       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3634       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3635       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3636 
3637       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3638     }
3639 
3640     /* (3) Create sequential Msub */
3641     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3642   }
3643 
3644   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3645   aij  = (Mat_SeqAIJ*)(Msub)->data;
3646   ii   = aij->i;
3647   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3648 
3649   /*
3650       m - number of local rows
3651       Ncols - number of columns (same on all processors)
3652       rstart - first row in new global matrix generated
3653   */
3654   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3655 
3656   if (call == MAT_INITIAL_MATRIX) {
3657     /* (4) Create parallel newmat */
3658     PetscMPIInt    rank,size;
3659     PetscInt       csize;
3660 
3661     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3662     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3663 
3664     /*
3665         Determine the number of non-zeros in the diagonal and off-diagonal
3666         portions of the matrix in order to do correct preallocation
3667     */
3668 
3669     /* first get start and end of "diagonal" columns */
3670     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3671     if (csize == PETSC_DECIDE) {
3672       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3673       if (mglobal == Ncols) { /* square matrix */
3674         nlocal = m;
3675       } else {
3676         nlocal = Ncols/size + ((Ncols % size) > rank);
3677       }
3678     } else {
3679       nlocal = csize;
3680     }
3681     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3682     rstart = rend - nlocal;
3683     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3684 
3685     /* next, compute all the lengths */
3686     jj    = aij->j;
3687     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3688     olens = dlens + m;
3689     for (i=0; i<m; i++) {
3690       jend = ii[i+1] - ii[i];
3691       olen = 0;
3692       dlen = 0;
3693       for (j=0; j<jend; j++) {
3694         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3695         else dlen++;
3696         jj++;
3697       }
3698       olens[i] = olen;
3699       dlens[i] = dlen;
3700     }
3701 
3702     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3703     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3704 
3705     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3706     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3707     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3708     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3709     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3710     ierr = PetscFree(dlens);CHKERRQ(ierr);
3711 
3712   } else { /* call == MAT_REUSE_MATRIX */
3713     M    = *newmat;
3714     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3715     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3716     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3717     /*
3718          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3719        rather than the slower MatSetValues().
3720     */
3721     M->was_assembled = PETSC_TRUE;
3722     M->assembled     = PETSC_FALSE;
3723   }
3724 
3725   /* (5) Set values of Msub to *newmat */
3726   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3727   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3728 
3729   jj   = aij->j;
3730   aa   = aij->a;
3731   for (i=0; i<m; i++) {
3732     row = rstart + i;
3733     nz  = ii[i+1] - ii[i];
3734     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3735     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3736     jj += nz; aa += nz;
3737   }
3738   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3739 
3740   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742 
3743   ierr = PetscFree(colsub);CHKERRQ(ierr);
3744 
3745   /* save Msub, iscol_sub and iscmap used in processor for next request */
3746   if (call ==  MAT_INITIAL_MATRIX) {
3747     *newmat = M;
3748     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3749     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3750 
3751     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3752     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3753 
3754     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3755     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3756 
3757     if (iscol_local) {
3758       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3759       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3760     }
3761   }
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 /*
3766     Not great since it makes two copies of the submatrix, first an SeqAIJ
3767   in local and then by concatenating the local matrices the end result.
3768   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3769 
3770   Note: This requires a sequential iscol with all indices.
3771 */
3772 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3773 {
3774   PetscErrorCode ierr;
3775   PetscMPIInt    rank,size;
3776   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3777   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3778   Mat            M,Mreuse;
3779   MatScalar      *aa,*vwork;
3780   MPI_Comm       comm;
3781   Mat_SeqAIJ     *aij;
3782   PetscBool      colflag,allcolumns=PETSC_FALSE;
3783 
3784   PetscFunctionBegin;
3785   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3786   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3788 
3789   /* Check for special case: each processor gets entire matrix columns */
3790   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3791   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3792   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3793 
3794   if (call ==  MAT_REUSE_MATRIX) {
3795     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3796     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3797     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3798   } else {
3799     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3800   }
3801 
3802   /*
3803       m - number of local rows
3804       n - number of columns (same on all processors)
3805       rstart - first row in new global matrix generated
3806   */
3807   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3808   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3809   if (call == MAT_INITIAL_MATRIX) {
3810     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3811     ii  = aij->i;
3812     jj  = aij->j;
3813 
3814     /*
3815         Determine the number of non-zeros in the diagonal and off-diagonal
3816         portions of the matrix in order to do correct preallocation
3817     */
3818 
3819     /* first get start and end of "diagonal" columns */
3820     if (csize == PETSC_DECIDE) {
3821       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3822       if (mglobal == n) { /* square matrix */
3823         nlocal = m;
3824       } else {
3825         nlocal = n/size + ((n % size) > rank);
3826       }
3827     } else {
3828       nlocal = csize;
3829     }
3830     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3831     rstart = rend - nlocal;
3832     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3833 
3834     /* next, compute all the lengths */
3835     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3836     olens = dlens + m;
3837     for (i=0; i<m; i++) {
3838       jend = ii[i+1] - ii[i];
3839       olen = 0;
3840       dlen = 0;
3841       for (j=0; j<jend; j++) {
3842         if (*jj < rstart || *jj >= rend) olen++;
3843         else dlen++;
3844         jj++;
3845       }
3846       olens[i] = olen;
3847       dlens[i] = dlen;
3848     }
3849     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3850     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3851     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3852     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3853     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3854     ierr = PetscFree(dlens);CHKERRQ(ierr);
3855   } else {
3856     PetscInt ml,nl;
3857 
3858     M    = *newmat;
3859     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3860     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3861     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3862     /*
3863          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3864        rather than the slower MatSetValues().
3865     */
3866     M->was_assembled = PETSC_TRUE;
3867     M->assembled     = PETSC_FALSE;
3868   }
3869   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3870   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3871   ii   = aij->i;
3872   jj   = aij->j;
3873   aa   = aij->a;
3874   for (i=0; i<m; i++) {
3875     row   = rstart + i;
3876     nz    = ii[i+1] - ii[i];
3877     cwork = jj;     jj += nz;
3878     vwork = aa;     aa += nz;
3879     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3880   }
3881 
3882   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3883   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884   *newmat = M;
3885 
3886   /* save submatrix used in processor for next request */
3887   if (call ==  MAT_INITIAL_MATRIX) {
3888     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3889     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3890   }
3891   PetscFunctionReturn(0);
3892 }
3893 
3894 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3895 {
3896   PetscInt       m,cstart, cend,j,nnz,i,d;
3897   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3898   const PetscInt *JJ;
3899   PetscScalar    *values;
3900   PetscErrorCode ierr;
3901   PetscBool      nooffprocentries;
3902 
3903   PetscFunctionBegin;
3904   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3905 
3906   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3907   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3908   m      = B->rmap->n;
3909   cstart = B->cmap->rstart;
3910   cend   = B->cmap->rend;
3911   rstart = B->rmap->rstart;
3912 
3913   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3914 
3915 #if defined(PETSC_USE_DEBUG)
3916   for (i=0; i<m && Ii; i++) {
3917     nnz = Ii[i+1]- Ii[i];
3918     JJ  = J + Ii[i];
3919     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3920     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3921     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3922   }
3923 #endif
3924 
3925   for (i=0; i<m && Ii; i++) {
3926     nnz     = Ii[i+1]- Ii[i];
3927     JJ      = J + Ii[i];
3928     nnz_max = PetscMax(nnz_max,nnz);
3929     d       = 0;
3930     for (j=0; j<nnz; j++) {
3931       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3932     }
3933     d_nnz[i] = d;
3934     o_nnz[i] = nnz - d;
3935   }
3936   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3937   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3938 
3939   if (v) values = (PetscScalar*)v;
3940   else {
3941     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3942   }
3943 
3944   for (i=0; i<m && Ii; i++) {
3945     ii   = i + rstart;
3946     nnz  = Ii[i+1]- Ii[i];
3947     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3948   }
3949   nooffprocentries    = B->nooffprocentries;
3950   B->nooffprocentries = PETSC_TRUE;
3951   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3952   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   B->nooffprocentries = nooffprocentries;
3954 
3955   if (!v) {
3956     ierr = PetscFree(values);CHKERRQ(ierr);
3957   }
3958   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3959   PetscFunctionReturn(0);
3960 }
3961 
3962 /*@
3963    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3964    (the default parallel PETSc format).
3965 
3966    Collective on MPI_Comm
3967 
3968    Input Parameters:
3969 +  B - the matrix
3970 .  i - the indices into j for the start of each local row (starts with zero)
3971 .  j - the column indices for each local row (starts with zero)
3972 -  v - optional values in the matrix
3973 
3974    Level: developer
3975 
3976    Notes:
3977        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3978      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3979      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3980 
3981        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3982 
3983        The format which is used for the sparse matrix input, is equivalent to a
3984     row-major ordering.. i.e for the following matrix, the input data expected is
3985     as shown
3986 
3987 $        1 0 0
3988 $        2 0 3     P0
3989 $       -------
3990 $        4 5 6     P1
3991 $
3992 $     Process0 [P0]: rows_owned=[0,1]
3993 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3994 $        j =  {0,0,2}  [size = 3]
3995 $        v =  {1,2,3}  [size = 3]
3996 $
3997 $     Process1 [P1]: rows_owned=[2]
3998 $        i =  {0,3}    [size = nrow+1  = 1+1]
3999 $        j =  {0,1,2}  [size = 3]
4000 $        v =  {4,5,6}  [size = 3]
4001 
4002 .keywords: matrix, aij, compressed row, sparse, parallel
4003 
4004 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4005           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4006 @*/
4007 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4008 {
4009   PetscErrorCode ierr;
4010 
4011   PetscFunctionBegin;
4012   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4013   PetscFunctionReturn(0);
4014 }
4015 
4016 /*@C
4017    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4018    (the default parallel PETSc format).  For good matrix assembly performance
4019    the user should preallocate the matrix storage by setting the parameters
4020    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4021    performance can be increased by more than a factor of 50.
4022 
4023    Collective on MPI_Comm
4024 
4025    Input Parameters:
4026 +  B - the matrix
4027 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4028            (same value is used for all local rows)
4029 .  d_nnz - array containing the number of nonzeros in the various rows of the
4030            DIAGONAL portion of the local submatrix (possibly different for each row)
4031            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4032            The size of this array is equal to the number of local rows, i.e 'm'.
4033            For matrices that will be factored, you must leave room for (and set)
4034            the diagonal entry even if it is zero.
4035 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4036            submatrix (same value is used for all local rows).
4037 -  o_nnz - array containing the number of nonzeros in the various rows of the
4038            OFF-DIAGONAL portion of the local submatrix (possibly different for
4039            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4040            structure. The size of this array is equal to the number
4041            of local rows, i.e 'm'.
4042 
4043    If the *_nnz parameter is given then the *_nz parameter is ignored
4044 
4045    The AIJ format (also called the Yale sparse matrix format or
4046    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4047    storage.  The stored row and column indices begin with zero.
4048    See Users-Manual: ch_mat for details.
4049 
4050    The parallel matrix is partitioned such that the first m0 rows belong to
4051    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4052    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4053 
4054    The DIAGONAL portion of the local submatrix of a processor can be defined
4055    as the submatrix which is obtained by extraction the part corresponding to
4056    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4057    first row that belongs to the processor, r2 is the last row belonging to
4058    the this processor, and c1-c2 is range of indices of the local part of a
4059    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4060    common case of a square matrix, the row and column ranges are the same and
4061    the DIAGONAL part is also square. The remaining portion of the local
4062    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4063 
4064    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4065 
4066    You can call MatGetInfo() to get information on how effective the preallocation was;
4067    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4068    You can also run with the option -info and look for messages with the string
4069    malloc in them to see if additional memory allocation was needed.
4070 
4071    Example usage:
4072 
4073    Consider the following 8x8 matrix with 34 non-zero values, that is
4074    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4075    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4076    as follows:
4077 
4078 .vb
4079             1  2  0  |  0  3  0  |  0  4
4080     Proc0   0  5  6  |  7  0  0  |  8  0
4081             9  0 10  | 11  0  0  | 12  0
4082     -------------------------------------
4083            13  0 14  | 15 16 17  |  0  0
4084     Proc1   0 18  0  | 19 20 21  |  0  0
4085             0  0  0  | 22 23  0  | 24  0
4086     -------------------------------------
4087     Proc2  25 26 27  |  0  0 28  | 29  0
4088            30  0  0  | 31 32 33  |  0 34
4089 .ve
4090 
4091    This can be represented as a collection of submatrices as:
4092 
4093 .vb
4094       A B C
4095       D E F
4096       G H I
4097 .ve
4098 
4099    Where the submatrices A,B,C are owned by proc0, D,E,F are
4100    owned by proc1, G,H,I are owned by proc2.
4101 
4102    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104    The 'M','N' parameters are 8,8, and have the same values on all procs.
4105 
4106    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4107    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4108    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4109    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4110    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4111    matrix, ans [DF] as another SeqAIJ matrix.
4112 
4113    When d_nz, o_nz parameters are specified, d_nz storage elements are
4114    allocated for every row of the local diagonal submatrix, and o_nz
4115    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4116    One way to choose d_nz and o_nz is to use the max nonzerors per local
4117    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4118    In this case, the values of d_nz,o_nz are:
4119 .vb
4120      proc0 : dnz = 2, o_nz = 2
4121      proc1 : dnz = 3, o_nz = 2
4122      proc2 : dnz = 1, o_nz = 4
4123 .ve
4124    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4125    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4126    for proc3. i.e we are using 12+15+10=37 storage locations to store
4127    34 values.
4128 
4129    When d_nnz, o_nnz parameters are specified, the storage is specified
4130    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4131    In the above case the values for d_nnz,o_nnz are:
4132 .vb
4133      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4134      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4135      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4136 .ve
4137    Here the space allocated is sum of all the above values i.e 34, and
4138    hence pre-allocation is perfect.
4139 
4140    Level: intermediate
4141 
4142 .keywords: matrix, aij, compressed row, sparse, parallel
4143 
4144 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4145           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4146 @*/
4147 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4148 {
4149   PetscErrorCode ierr;
4150 
4151   PetscFunctionBegin;
4152   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4153   PetscValidType(B,1);
4154   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 /*@
4159      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4160          CSR format the local rows.
4161 
4162    Collective on MPI_Comm
4163 
4164    Input Parameters:
4165 +  comm - MPI communicator
4166 .  m - number of local rows (Cannot be PETSC_DECIDE)
4167 .  n - This value should be the same as the local size used in creating the
4168        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4169        calculated if N is given) For square matrices n is almost always m.
4170 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4171 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4172 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4173 .   j - column indices
4174 -   a - matrix values
4175 
4176    Output Parameter:
4177 .   mat - the matrix
4178 
4179    Level: intermediate
4180 
4181    Notes:
4182        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4183      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4184      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4185 
4186        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4187 
4188        The format which is used for the sparse matrix input, is equivalent to a
4189     row-major ordering.. i.e for the following matrix, the input data expected is
4190     as shown
4191 
4192 $        1 0 0
4193 $        2 0 3     P0
4194 $       -------
4195 $        4 5 6     P1
4196 $
4197 $     Process0 [P0]: rows_owned=[0,1]
4198 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4199 $        j =  {0,0,2}  [size = 3]
4200 $        v =  {1,2,3}  [size = 3]
4201 $
4202 $     Process1 [P1]: rows_owned=[2]
4203 $        i =  {0,3}    [size = nrow+1  = 1+1]
4204 $        j =  {0,1,2}  [size = 3]
4205 $        v =  {4,5,6}  [size = 3]
4206 
4207 .keywords: matrix, aij, compressed row, sparse, parallel
4208 
4209 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4210           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4211 @*/
4212 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4213 {
4214   PetscErrorCode ierr;
4215 
4216   PetscFunctionBegin;
4217   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4218   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4219   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4220   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4221   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4222   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4223   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4224   PetscFunctionReturn(0);
4225 }
4226 
4227 /*@C
4228    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4229    (the default parallel PETSc format).  For good matrix assembly performance
4230    the user should preallocate the matrix storage by setting the parameters
4231    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4232    performance can be increased by more than a factor of 50.
4233 
4234    Collective on MPI_Comm
4235 
4236    Input Parameters:
4237 +  comm - MPI communicator
4238 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4239            This value should be the same as the local size used in creating the
4240            y vector for the matrix-vector product y = Ax.
4241 .  n - This value should be the same as the local size used in creating the
4242        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4243        calculated if N is given) For square matrices n is almost always m.
4244 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4245 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4246 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4247            (same value is used for all local rows)
4248 .  d_nnz - array containing the number of nonzeros in the various rows of the
4249            DIAGONAL portion of the local submatrix (possibly different for each row)
4250            or NULL, if d_nz is used to specify the nonzero structure.
4251            The size of this array is equal to the number of local rows, i.e 'm'.
4252 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4253            submatrix (same value is used for all local rows).
4254 -  o_nnz - array containing the number of nonzeros in the various rows of the
4255            OFF-DIAGONAL portion of the local submatrix (possibly different for
4256            each row) or NULL, if o_nz is used to specify the nonzero
4257            structure. The size of this array is equal to the number
4258            of local rows, i.e 'm'.
4259 
4260    Output Parameter:
4261 .  A - the matrix
4262 
4263    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4264    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4265    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4266 
4267    Notes:
4268    If the *_nnz parameter is given then the *_nz parameter is ignored
4269 
4270    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4271    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4272    storage requirements for this matrix.
4273 
4274    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4275    processor than it must be used on all processors that share the object for
4276    that argument.
4277 
4278    The user MUST specify either the local or global matrix dimensions
4279    (possibly both).
4280 
4281    The parallel matrix is partitioned across processors such that the
4282    first m0 rows belong to process 0, the next m1 rows belong to
4283    process 1, the next m2 rows belong to process 2 etc.. where
4284    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4285    values corresponding to [m x N] submatrix.
4286 
4287    The columns are logically partitioned with the n0 columns belonging
4288    to 0th partition, the next n1 columns belonging to the next
4289    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4290 
4291    The DIAGONAL portion of the local submatrix on any given processor
4292    is the submatrix corresponding to the rows and columns m,n
4293    corresponding to the given processor. i.e diagonal matrix on
4294    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4295    etc. The remaining portion of the local submatrix [m x (N-n)]
4296    constitute the OFF-DIAGONAL portion. The example below better
4297    illustrates this concept.
4298 
4299    For a square global matrix we define each processor's diagonal portion
4300    to be its local rows and the corresponding columns (a square submatrix);
4301    each processor's off-diagonal portion encompasses the remainder of the
4302    local matrix (a rectangular submatrix).
4303 
4304    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4305 
4306    When calling this routine with a single process communicator, a matrix of
4307    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4308    type of communicator, use the construction mechanism
4309 .vb
4310      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4311 .ve
4312 
4313 $     MatCreate(...,&A);
4314 $     MatSetType(A,MATMPIAIJ);
4315 $     MatSetSizes(A, m,n,M,N);
4316 $     MatMPIAIJSetPreallocation(A,...);
4317 
4318    By default, this format uses inodes (identical nodes) when possible.
4319    We search for consecutive rows with the same nonzero structure, thereby
4320    reusing matrix information to achieve increased efficiency.
4321 
4322    Options Database Keys:
4323 +  -mat_no_inode  - Do not use inodes
4324 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4325 
4326 
4327 
4328    Example usage:
4329 
4330    Consider the following 8x8 matrix with 34 non-zero values, that is
4331    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4332    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4333    as follows
4334 
4335 .vb
4336             1  2  0  |  0  3  0  |  0  4
4337     Proc0   0  5  6  |  7  0  0  |  8  0
4338             9  0 10  | 11  0  0  | 12  0
4339     -------------------------------------
4340            13  0 14  | 15 16 17  |  0  0
4341     Proc1   0 18  0  | 19 20 21  |  0  0
4342             0  0  0  | 22 23  0  | 24  0
4343     -------------------------------------
4344     Proc2  25 26 27  |  0  0 28  | 29  0
4345            30  0  0  | 31 32 33  |  0 34
4346 .ve
4347 
4348    This can be represented as a collection of submatrices as
4349 
4350 .vb
4351       A B C
4352       D E F
4353       G H I
4354 .ve
4355 
4356    Where the submatrices A,B,C are owned by proc0, D,E,F are
4357    owned by proc1, G,H,I are owned by proc2.
4358 
4359    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4360    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4361    The 'M','N' parameters are 8,8, and have the same values on all procs.
4362 
4363    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4364    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4365    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4366    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4367    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4368    matrix, ans [DF] as another SeqAIJ matrix.
4369 
4370    When d_nz, o_nz parameters are specified, d_nz storage elements are
4371    allocated for every row of the local diagonal submatrix, and o_nz
4372    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4373    One way to choose d_nz and o_nz is to use the max nonzerors per local
4374    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4375    In this case, the values of d_nz,o_nz are
4376 .vb
4377      proc0 : dnz = 2, o_nz = 2
4378      proc1 : dnz = 3, o_nz = 2
4379      proc2 : dnz = 1, o_nz = 4
4380 .ve
4381    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4382    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4383    for proc3. i.e we are using 12+15+10=37 storage locations to store
4384    34 values.
4385 
4386    When d_nnz, o_nnz parameters are specified, the storage is specified
4387    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4388    In the above case the values for d_nnz,o_nnz are
4389 .vb
4390      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4391      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4392      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4393 .ve
4394    Here the space allocated is sum of all the above values i.e 34, and
4395    hence pre-allocation is perfect.
4396 
4397    Level: intermediate
4398 
4399 .keywords: matrix, aij, compressed row, sparse, parallel
4400 
4401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4402           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4403 @*/
4404 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4405 {
4406   PetscErrorCode ierr;
4407   PetscMPIInt    size;
4408 
4409   PetscFunctionBegin;
4410   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4411   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4412   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4413   if (size > 1) {
4414     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4415     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4416   } else {
4417     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4418     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4419   }
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4424 {
4425   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4426   PetscBool      flg;
4427   PetscErrorCode ierr;
4428 
4429   PetscFunctionBegin;
4430   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4431   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4432   if (Ad)     *Ad     = a->A;
4433   if (Ao)     *Ao     = a->B;
4434   if (colmap) *colmap = a->garray;
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4439 {
4440   PetscErrorCode ierr;
4441   PetscInt       m,N,i,rstart,nnz,Ii;
4442   PetscInt       *indx;
4443   PetscScalar    *values;
4444 
4445   PetscFunctionBegin;
4446   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4447   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4448     PetscInt       *dnz,*onz,sum,bs,cbs;
4449 
4450     if (n == PETSC_DECIDE) {
4451       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4452     }
4453     /* Check sum(n) = N */
4454     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4455     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4456 
4457     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4458     rstart -= m;
4459 
4460     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4461     for (i=0; i<m; i++) {
4462       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4463       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4464       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4465     }
4466 
4467     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4468     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4469     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4470     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4471     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4472     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4473     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4474     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4475   }
4476 
4477   /* numeric phase */
4478   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4479   for (i=0; i<m; i++) {
4480     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4481     Ii   = i + rstart;
4482     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4483     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4484   }
4485   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4486   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4487   PetscFunctionReturn(0);
4488 }
4489 
4490 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4491 {
4492   PetscErrorCode    ierr;
4493   PetscMPIInt       rank;
4494   PetscInt          m,N,i,rstart,nnz;
4495   size_t            len;
4496   const PetscInt    *indx;
4497   PetscViewer       out;
4498   char              *name;
4499   Mat               B;
4500   const PetscScalar *values;
4501 
4502   PetscFunctionBegin;
4503   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4504   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4505   /* Should this be the type of the diagonal block of A? */
4506   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4507   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4508   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4509   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4510   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4511   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4512   for (i=0; i<m; i++) {
4513     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4514     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4515     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4516   }
4517   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4519 
4520   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4521   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4522   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4523   sprintf(name,"%s.%d",outfile,rank);
4524   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4525   ierr = PetscFree(name);CHKERRQ(ierr);
4526   ierr = MatView(B,out);CHKERRQ(ierr);
4527   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4528   ierr = MatDestroy(&B);CHKERRQ(ierr);
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4533 {
4534   PetscErrorCode      ierr;
4535   Mat_Merge_SeqsToMPI *merge;
4536   PetscContainer      container;
4537 
4538   PetscFunctionBegin;
4539   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4540   if (container) {
4541     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4542     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4543     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4544     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4545     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4546     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4547     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4554     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4555     ierr = PetscFree(merge);CHKERRQ(ierr);
4556     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4557   }
4558   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4559   PetscFunctionReturn(0);
4560 }
4561 
4562 #include <../src/mat/utils/freespace.h>
4563 #include <petscbt.h>
4564 
4565 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4566 {
4567   PetscErrorCode      ierr;
4568   MPI_Comm            comm;
4569   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4570   PetscMPIInt         size,rank,taga,*len_s;
4571   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4572   PetscInt            proc,m;
4573   PetscInt            **buf_ri,**buf_rj;
4574   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4575   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4576   MPI_Request         *s_waits,*r_waits;
4577   MPI_Status          *status;
4578   MatScalar           *aa=a->a;
4579   MatScalar           **abuf_r,*ba_i;
4580   Mat_Merge_SeqsToMPI *merge;
4581   PetscContainer      container;
4582 
4583   PetscFunctionBegin;
4584   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4585   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4586 
4587   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4588   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4589 
4590   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4591   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4592 
4593   bi     = merge->bi;
4594   bj     = merge->bj;
4595   buf_ri = merge->buf_ri;
4596   buf_rj = merge->buf_rj;
4597 
4598   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4599   owners = merge->rowmap->range;
4600   len_s  = merge->len_s;
4601 
4602   /* send and recv matrix values */
4603   /*-----------------------------*/
4604   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4605   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4606 
4607   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4608   for (proc=0,k=0; proc<size; proc++) {
4609     if (!len_s[proc]) continue;
4610     i    = owners[proc];
4611     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4612     k++;
4613   }
4614 
4615   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4616   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4617   ierr = PetscFree(status);CHKERRQ(ierr);
4618 
4619   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4620   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4621 
4622   /* insert mat values of mpimat */
4623   /*----------------------------*/
4624   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4625   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4626 
4627   for (k=0; k<merge->nrecv; k++) {
4628     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4629     nrows       = *(buf_ri_k[k]);
4630     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4631     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4632   }
4633 
4634   /* set values of ba */
4635   m = merge->rowmap->n;
4636   for (i=0; i<m; i++) {
4637     arow = owners[rank] + i;
4638     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4639     bnzi = bi[i+1] - bi[i];
4640     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4641 
4642     /* add local non-zero vals of this proc's seqmat into ba */
4643     anzi   = ai[arow+1] - ai[arow];
4644     aj     = a->j + ai[arow];
4645     aa     = a->a + ai[arow];
4646     nextaj = 0;
4647     for (j=0; nextaj<anzi; j++) {
4648       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4649         ba_i[j] += aa[nextaj++];
4650       }
4651     }
4652 
4653     /* add received vals into ba */
4654     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4655       /* i-th row */
4656       if (i == *nextrow[k]) {
4657         anzi   = *(nextai[k]+1) - *nextai[k];
4658         aj     = buf_rj[k] + *(nextai[k]);
4659         aa     = abuf_r[k] + *(nextai[k]);
4660         nextaj = 0;
4661         for (j=0; nextaj<anzi; j++) {
4662           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4663             ba_i[j] += aa[nextaj++];
4664           }
4665         }
4666         nextrow[k]++; nextai[k]++;
4667       }
4668     }
4669     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4670   }
4671   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4672   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4673 
4674   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4675   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4676   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4677   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4678   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4679   PetscFunctionReturn(0);
4680 }
4681 
4682 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4683 {
4684   PetscErrorCode      ierr;
4685   Mat                 B_mpi;
4686   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4687   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4688   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4689   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4690   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4691   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4692   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4693   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4694   MPI_Status          *status;
4695   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4696   PetscBT             lnkbt;
4697   Mat_Merge_SeqsToMPI *merge;
4698   PetscContainer      container;
4699 
4700   PetscFunctionBegin;
4701   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4702 
4703   /* make sure it is a PETSc comm */
4704   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4705   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4706   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4707 
4708   ierr = PetscNew(&merge);CHKERRQ(ierr);
4709   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4710 
4711   /* determine row ownership */
4712   /*---------------------------------------------------------*/
4713   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4714   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4715   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4716   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4717   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4718   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4719   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4720 
4721   m      = merge->rowmap->n;
4722   owners = merge->rowmap->range;
4723 
4724   /* determine the number of messages to send, their lengths */
4725   /*---------------------------------------------------------*/
4726   len_s = merge->len_s;
4727 
4728   len          = 0; /* length of buf_si[] */
4729   merge->nsend = 0;
4730   for (proc=0; proc<size; proc++) {
4731     len_si[proc] = 0;
4732     if (proc == rank) {
4733       len_s[proc] = 0;
4734     } else {
4735       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4736       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4737     }
4738     if (len_s[proc]) {
4739       merge->nsend++;
4740       nrows = 0;
4741       for (i=owners[proc]; i<owners[proc+1]; i++) {
4742         if (ai[i+1] > ai[i]) nrows++;
4743       }
4744       len_si[proc] = 2*(nrows+1);
4745       len         += len_si[proc];
4746     }
4747   }
4748 
4749   /* determine the number and length of messages to receive for ij-structure */
4750   /*-------------------------------------------------------------------------*/
4751   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4752   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4753 
4754   /* post the Irecv of j-structure */
4755   /*-------------------------------*/
4756   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4757   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4758 
4759   /* post the Isend of j-structure */
4760   /*--------------------------------*/
4761   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4762 
4763   for (proc=0, k=0; proc<size; proc++) {
4764     if (!len_s[proc]) continue;
4765     i    = owners[proc];
4766     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4767     k++;
4768   }
4769 
4770   /* receives and sends of j-structure are complete */
4771   /*------------------------------------------------*/
4772   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4773   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4774 
4775   /* send and recv i-structure */
4776   /*---------------------------*/
4777   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4778   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4779 
4780   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4781   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4782   for (proc=0,k=0; proc<size; proc++) {
4783     if (!len_s[proc]) continue;
4784     /* form outgoing message for i-structure:
4785          buf_si[0]:                 nrows to be sent
4786                [1:nrows]:           row index (global)
4787                [nrows+1:2*nrows+1]: i-structure index
4788     */
4789     /*-------------------------------------------*/
4790     nrows       = len_si[proc]/2 - 1;
4791     buf_si_i    = buf_si + nrows+1;
4792     buf_si[0]   = nrows;
4793     buf_si_i[0] = 0;
4794     nrows       = 0;
4795     for (i=owners[proc]; i<owners[proc+1]; i++) {
4796       anzi = ai[i+1] - ai[i];
4797       if (anzi) {
4798         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4799         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4800         nrows++;
4801       }
4802     }
4803     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4804     k++;
4805     buf_si += len_si[proc];
4806   }
4807 
4808   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4809   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4810 
4811   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4812   for (i=0; i<merge->nrecv; i++) {
4813     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4814   }
4815 
4816   ierr = PetscFree(len_si);CHKERRQ(ierr);
4817   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4818   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4819   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4820   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4821   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4822   ierr = PetscFree(status);CHKERRQ(ierr);
4823 
4824   /* compute a local seq matrix in each processor */
4825   /*----------------------------------------------*/
4826   /* allocate bi array and free space for accumulating nonzero column info */
4827   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4828   bi[0] = 0;
4829 
4830   /* create and initialize a linked list */
4831   nlnk = N+1;
4832   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4833 
4834   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4835   len  = ai[owners[rank+1]] - ai[owners[rank]];
4836   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4837 
4838   current_space = free_space;
4839 
4840   /* determine symbolic info for each local row */
4841   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4842 
4843   for (k=0; k<merge->nrecv; k++) {
4844     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4845     nrows       = *buf_ri_k[k];
4846     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4847     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4848   }
4849 
4850   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4851   len  = 0;
4852   for (i=0; i<m; i++) {
4853     bnzi = 0;
4854     /* add local non-zero cols of this proc's seqmat into lnk */
4855     arow  = owners[rank] + i;
4856     anzi  = ai[arow+1] - ai[arow];
4857     aj    = a->j + ai[arow];
4858     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4859     bnzi += nlnk;
4860     /* add received col data into lnk */
4861     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4862       if (i == *nextrow[k]) { /* i-th row */
4863         anzi  = *(nextai[k]+1) - *nextai[k];
4864         aj    = buf_rj[k] + *nextai[k];
4865         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4866         bnzi += nlnk;
4867         nextrow[k]++; nextai[k]++;
4868       }
4869     }
4870     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4871 
4872     /* if free space is not available, make more free space */
4873     if (current_space->local_remaining<bnzi) {
4874       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4875       nspacedouble++;
4876     }
4877     /* copy data into free space, then initialize lnk */
4878     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4879     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4880 
4881     current_space->array           += bnzi;
4882     current_space->local_used      += bnzi;
4883     current_space->local_remaining -= bnzi;
4884 
4885     bi[i+1] = bi[i] + bnzi;
4886   }
4887 
4888   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4889 
4890   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4891   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4892   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4893 
4894   /* create symbolic parallel matrix B_mpi */
4895   /*---------------------------------------*/
4896   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4897   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4898   if (n==PETSC_DECIDE) {
4899     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4900   } else {
4901     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4902   }
4903   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4904   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4905   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4906   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4907   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4908 
4909   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4910   B_mpi->assembled    = PETSC_FALSE;
4911   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4912   merge->bi           = bi;
4913   merge->bj           = bj;
4914   merge->buf_ri       = buf_ri;
4915   merge->buf_rj       = buf_rj;
4916   merge->coi          = NULL;
4917   merge->coj          = NULL;
4918   merge->owners_co    = NULL;
4919 
4920   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4921 
4922   /* attach the supporting struct to B_mpi for reuse */
4923   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4924   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4925   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4926   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4927   *mpimat = B_mpi;
4928 
4929   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 /*@C
4934       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4935                  matrices from each processor
4936 
4937     Collective on MPI_Comm
4938 
4939    Input Parameters:
4940 +    comm - the communicators the parallel matrix will live on
4941 .    seqmat - the input sequential matrices
4942 .    m - number of local rows (or PETSC_DECIDE)
4943 .    n - number of local columns (or PETSC_DECIDE)
4944 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4945 
4946    Output Parameter:
4947 .    mpimat - the parallel matrix generated
4948 
4949     Level: advanced
4950 
4951    Notes:
4952      The dimensions of the sequential matrix in each processor MUST be the same.
4953      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4954      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4955 @*/
4956 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4957 {
4958   PetscErrorCode ierr;
4959   PetscMPIInt    size;
4960 
4961   PetscFunctionBegin;
4962   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4963   if (size == 1) {
4964     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4965     if (scall == MAT_INITIAL_MATRIX) {
4966       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4967     } else {
4968       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4969     }
4970     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4971     PetscFunctionReturn(0);
4972   }
4973   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4974   if (scall == MAT_INITIAL_MATRIX) {
4975     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4976   }
4977   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4978   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 /*@
4983      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4984           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4985           with MatGetSize()
4986 
4987     Not Collective
4988 
4989    Input Parameters:
4990 +    A - the matrix
4991 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4992 
4993    Output Parameter:
4994 .    A_loc - the local sequential matrix generated
4995 
4996     Level: developer
4997 
4998 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4999 
5000 @*/
5001 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5002 {
5003   PetscErrorCode ierr;
5004   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5005   Mat_SeqAIJ     *mat,*a,*b;
5006   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5007   MatScalar      *aa,*ba,*cam;
5008   PetscScalar    *ca;
5009   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5010   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5011   PetscBool      match;
5012   MPI_Comm       comm;
5013   PetscMPIInt    size;
5014 
5015   PetscFunctionBegin;
5016   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5017   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5018   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5020   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5021 
5022   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5023   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5024   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5025   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5026   aa = a->a; ba = b->a;
5027   if (scall == MAT_INITIAL_MATRIX) {
5028     if (size == 1) {
5029       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5030       PetscFunctionReturn(0);
5031     }
5032 
5033     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5034     ci[0] = 0;
5035     for (i=0; i<am; i++) {
5036       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5037     }
5038     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5039     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5040     k    = 0;
5041     for (i=0; i<am; i++) {
5042       ncols_o = bi[i+1] - bi[i];
5043       ncols_d = ai[i+1] - ai[i];
5044       /* off-diagonal portion of A */
5045       for (jo=0; jo<ncols_o; jo++) {
5046         col = cmap[*bj];
5047         if (col >= cstart) break;
5048         cj[k]   = col; bj++;
5049         ca[k++] = *ba++;
5050       }
5051       /* diagonal portion of A */
5052       for (j=0; j<ncols_d; j++) {
5053         cj[k]   = cstart + *aj++;
5054         ca[k++] = *aa++;
5055       }
5056       /* off-diagonal portion of A */
5057       for (j=jo; j<ncols_o; j++) {
5058         cj[k]   = cmap[*bj++];
5059         ca[k++] = *ba++;
5060       }
5061     }
5062     /* put together the new matrix */
5063     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5064     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5065     /* Since these are PETSc arrays, change flags to free them as necessary. */
5066     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5067     mat->free_a  = PETSC_TRUE;
5068     mat->free_ij = PETSC_TRUE;
5069     mat->nonew   = 0;
5070   } else if (scall == MAT_REUSE_MATRIX) {
5071     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5072     ci = mat->i; cj = mat->j; cam = mat->a;
5073     for (i=0; i<am; i++) {
5074       /* off-diagonal portion of A */
5075       ncols_o = bi[i+1] - bi[i];
5076       for (jo=0; jo<ncols_o; jo++) {
5077         col = cmap[*bj];
5078         if (col >= cstart) break;
5079         *cam++ = *ba++; bj++;
5080       }
5081       /* diagonal portion of A */
5082       ncols_d = ai[i+1] - ai[i];
5083       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5084       /* off-diagonal portion of A */
5085       for (j=jo; j<ncols_o; j++) {
5086         *cam++ = *ba++; bj++;
5087       }
5088     }
5089   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5090   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 /*@C
5095      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5096 
5097     Not Collective
5098 
5099    Input Parameters:
5100 +    A - the matrix
5101 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5102 -    row, col - index sets of rows and columns to extract (or NULL)
5103 
5104    Output Parameter:
5105 .    A_loc - the local sequential matrix generated
5106 
5107     Level: developer
5108 
5109 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5110 
5111 @*/
5112 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5113 {
5114   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5115   PetscErrorCode ierr;
5116   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5117   IS             isrowa,iscola;
5118   Mat            *aloc;
5119   PetscBool      match;
5120 
5121   PetscFunctionBegin;
5122   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5123   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5124   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5125   if (!row) {
5126     start = A->rmap->rstart; end = A->rmap->rend;
5127     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5128   } else {
5129     isrowa = *row;
5130   }
5131   if (!col) {
5132     start = A->cmap->rstart;
5133     cmap  = a->garray;
5134     nzA   = a->A->cmap->n;
5135     nzB   = a->B->cmap->n;
5136     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5137     ncols = 0;
5138     for (i=0; i<nzB; i++) {
5139       if (cmap[i] < start) idx[ncols++] = cmap[i];
5140       else break;
5141     }
5142     imark = i;
5143     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5144     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5145     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5146   } else {
5147     iscola = *col;
5148   }
5149   if (scall != MAT_INITIAL_MATRIX) {
5150     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5151     aloc[0] = *A_loc;
5152   }
5153   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5154   if (!col) { /* attach global id of condensed columns */
5155     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5156   }
5157   *A_loc = aloc[0];
5158   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5159   if (!row) {
5160     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5161   }
5162   if (!col) {
5163     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5164   }
5165   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5166   PetscFunctionReturn(0);
5167 }
5168 
5169 /*@C
5170     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5171 
5172     Collective on Mat
5173 
5174    Input Parameters:
5175 +    A,B - the matrices in mpiaij format
5176 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5177 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5178 
5179    Output Parameter:
5180 +    rowb, colb - index sets of rows and columns of B to extract
5181 -    B_seq - the sequential matrix generated
5182 
5183     Level: developer
5184 
5185 @*/
5186 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5187 {
5188   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5189   PetscErrorCode ierr;
5190   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5191   IS             isrowb,iscolb;
5192   Mat            *bseq=NULL;
5193 
5194   PetscFunctionBegin;
5195   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5196     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5197   }
5198   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5199 
5200   if (scall == MAT_INITIAL_MATRIX) {
5201     start = A->cmap->rstart;
5202     cmap  = a->garray;
5203     nzA   = a->A->cmap->n;
5204     nzB   = a->B->cmap->n;
5205     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5206     ncols = 0;
5207     for (i=0; i<nzB; i++) {  /* row < local row index */
5208       if (cmap[i] < start) idx[ncols++] = cmap[i];
5209       else break;
5210     }
5211     imark = i;
5212     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5213     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5214     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5215     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5216   } else {
5217     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5218     isrowb  = *rowb; iscolb = *colb;
5219     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5220     bseq[0] = *B_seq;
5221   }
5222   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5223   *B_seq = bseq[0];
5224   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5225   if (!rowb) {
5226     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5227   } else {
5228     *rowb = isrowb;
5229   }
5230   if (!colb) {
5231     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5232   } else {
5233     *colb = iscolb;
5234   }
5235   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5236   PetscFunctionReturn(0);
5237 }
5238 
5239 /*
5240     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5241     of the OFF-DIAGONAL portion of local A
5242 
5243     Collective on Mat
5244 
5245    Input Parameters:
5246 +    A,B - the matrices in mpiaij format
5247 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5248 
5249    Output Parameter:
5250 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5251 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5252 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5253 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5254 
5255     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5256      for this matrix. This is not desirable..
5257 
5258     Level: developer
5259 
5260 */
5261 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5262 {
5263   PetscErrorCode         ierr;
5264   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5265   Mat_SeqAIJ             *b_oth;
5266   VecScatter             ctx;
5267   MPI_Comm               comm;
5268   const PetscMPIInt      *rprocs,*sprocs;
5269   const PetscInt         *srow,*rstarts,*sstarts;
5270   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5271   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5272   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5273   MPI_Request            *rwaits = NULL,*swaits = NULL;
5274   MPI_Status             rstatus;
5275   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5276 
5277   PetscFunctionBegin;
5278   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5279   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5280 
5281   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5282     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5283   }
5284   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5285   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5286 
5287   if (size == 1) {
5288     startsj_s = NULL;
5289     bufa_ptr  = NULL;
5290     *B_oth    = NULL;
5291     PetscFunctionReturn(0);
5292   }
5293 
5294   ctx = a->Mvctx;
5295   tag = ((PetscObject)ctx)->tag;
5296 
5297   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5298   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5299   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5300   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5301   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5302   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5303   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5304 
5305   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5306   if (scall == MAT_INITIAL_MATRIX) {
5307     /* i-array */
5308     /*---------*/
5309     /*  post receives */
5310     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5311     for (i=0; i<nrecvs; i++) {
5312       rowlen = rvalues + rstarts[i]*rbs;
5313       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5314       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5315     }
5316 
5317     /* pack the outgoing message */
5318     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5319 
5320     sstartsj[0] = 0;
5321     rstartsj[0] = 0;
5322     len         = 0; /* total length of j or a array to be sent */
5323     if (nsends) {
5324       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5325       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5326     }
5327     for (i=0; i<nsends; i++) {
5328       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5329       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5330       for (j=0; j<nrows; j++) {
5331         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5332         for (l=0; l<sbs; l++) {
5333           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5334 
5335           rowlen[j*sbs+l] = ncols;
5336 
5337           len += ncols;
5338           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5339         }
5340         k++;
5341       }
5342       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5343 
5344       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5345     }
5346     /* recvs and sends of i-array are completed */
5347     i = nrecvs;
5348     while (i--) {
5349       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5350     }
5351     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5352     ierr = PetscFree(svalues);CHKERRQ(ierr);
5353 
5354     /* allocate buffers for sending j and a arrays */
5355     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5356     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5357 
5358     /* create i-array of B_oth */
5359     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5360 
5361     b_othi[0] = 0;
5362     len       = 0; /* total length of j or a array to be received */
5363     k         = 0;
5364     for (i=0; i<nrecvs; i++) {
5365       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5366       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5367       for (j=0; j<nrows; j++) {
5368         b_othi[k+1] = b_othi[k] + rowlen[j];
5369         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5370         k++;
5371       }
5372       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5373     }
5374     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5375 
5376     /* allocate space for j and a arrrays of B_oth */
5377     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5378     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5379 
5380     /* j-array */
5381     /*---------*/
5382     /*  post receives of j-array */
5383     for (i=0; i<nrecvs; i++) {
5384       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5385       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5386     }
5387 
5388     /* pack the outgoing message j-array */
5389     if (nsends) k = sstarts[0];
5390     for (i=0; i<nsends; i++) {
5391       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5392       bufJ  = bufj+sstartsj[i];
5393       for (j=0; j<nrows; j++) {
5394         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5395         for (ll=0; ll<sbs; ll++) {
5396           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5397           for (l=0; l<ncols; l++) {
5398             *bufJ++ = cols[l];
5399           }
5400           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5401         }
5402       }
5403       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5404     }
5405 
5406     /* recvs and sends of j-array are completed */
5407     i = nrecvs;
5408     while (i--) {
5409       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5410     }
5411     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5412   } else if (scall == MAT_REUSE_MATRIX) {
5413     sstartsj = *startsj_s;
5414     rstartsj = *startsj_r;
5415     bufa     = *bufa_ptr;
5416     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5417     b_otha   = b_oth->a;
5418   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5419 
5420   /* a-array */
5421   /*---------*/
5422   /*  post receives of a-array */
5423   for (i=0; i<nrecvs; i++) {
5424     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5425     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5426   }
5427 
5428   /* pack the outgoing message a-array */
5429   if (nsends) k = sstarts[0];
5430   for (i=0; i<nsends; i++) {
5431     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5432     bufA  = bufa+sstartsj[i];
5433     for (j=0; j<nrows; j++) {
5434       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5435       for (ll=0; ll<sbs; ll++) {
5436         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5437         for (l=0; l<ncols; l++) {
5438           *bufA++ = vals[l];
5439         }
5440         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5441       }
5442     }
5443     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5444   }
5445   /* recvs and sends of a-array are completed */
5446   i = nrecvs;
5447   while (i--) {
5448     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5449   }
5450   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5451   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5452 
5453   if (scall == MAT_INITIAL_MATRIX) {
5454     /* put together the new matrix */
5455     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5456 
5457     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5458     /* Since these are PETSc arrays, change flags to free them as necessary. */
5459     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5460     b_oth->free_a  = PETSC_TRUE;
5461     b_oth->free_ij = PETSC_TRUE;
5462     b_oth->nonew   = 0;
5463 
5464     ierr = PetscFree(bufj);CHKERRQ(ierr);
5465     if (!startsj_s || !bufa_ptr) {
5466       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5467       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5468     } else {
5469       *startsj_s = sstartsj;
5470       *startsj_r = rstartsj;
5471       *bufa_ptr  = bufa;
5472     }
5473   }
5474 
5475   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5476   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5477   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5478   PetscFunctionReturn(0);
5479 }
5480 
5481 /*@C
5482   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5483 
5484   Not Collective
5485 
5486   Input Parameters:
5487 . A - The matrix in mpiaij format
5488 
5489   Output Parameter:
5490 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5491 . colmap - A map from global column index to local index into lvec
5492 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5493 
5494   Level: developer
5495 
5496 @*/
5497 #if defined(PETSC_USE_CTABLE)
5498 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5499 #else
5500 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5501 #endif
5502 {
5503   Mat_MPIAIJ *a;
5504 
5505   PetscFunctionBegin;
5506   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5507   PetscValidPointer(lvec, 2);
5508   PetscValidPointer(colmap, 3);
5509   PetscValidPointer(multScatter, 4);
5510   a = (Mat_MPIAIJ*) A->data;
5511   if (lvec) *lvec = a->lvec;
5512   if (colmap) *colmap = a->colmap;
5513   if (multScatter) *multScatter = a->Mvctx;
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5519 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5520 #if defined(PETSC_HAVE_MKL_SPARSE)
5521 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5522 #endif
5523 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5524 #if defined(PETSC_HAVE_ELEMENTAL)
5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_HYPRE)
5528 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5529 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5530 #endif
5531 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5533 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5534 
5535 /*
5536     Computes (B'*A')' since computing B*A directly is untenable
5537 
5538                n                       p                          p
5539         (              )       (              )         (                  )
5540       m (      A       )  *  n (       B      )   =   m (         C        )
5541         (              )       (              )         (                  )
5542 
5543 */
5544 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5545 {
5546   PetscErrorCode ierr;
5547   Mat            At,Bt,Ct;
5548 
5549   PetscFunctionBegin;
5550   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5551   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5552   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5553   ierr = MatDestroy(&At);CHKERRQ(ierr);
5554   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5555   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5556   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5557   PetscFunctionReturn(0);
5558 }
5559 
5560 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5561 {
5562   PetscErrorCode ierr;
5563   PetscInt       m=A->rmap->n,n=B->cmap->n;
5564   Mat            Cmat;
5565 
5566   PetscFunctionBegin;
5567   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5568   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5569   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5570   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5571   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5572   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5573   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5574   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5575 
5576   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5577 
5578   *C = Cmat;
5579   PetscFunctionReturn(0);
5580 }
5581 
5582 /* ----------------------------------------------------------------*/
5583 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5584 {
5585   PetscErrorCode ierr;
5586 
5587   PetscFunctionBegin;
5588   if (scall == MAT_INITIAL_MATRIX) {
5589     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5590     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5591     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5592   }
5593   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5594   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5595   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5596   PetscFunctionReturn(0);
5597 }
5598 
5599 /*MC
5600    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5601 
5602    Options Database Keys:
5603 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5604 
5605   Level: beginner
5606 
5607 .seealso: MatCreateAIJ()
5608 M*/
5609 
5610 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5611 {
5612   Mat_MPIAIJ     *b;
5613   PetscErrorCode ierr;
5614   PetscMPIInt    size;
5615 
5616   PetscFunctionBegin;
5617   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5618 
5619   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5620   B->data       = (void*)b;
5621   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5622   B->assembled  = PETSC_FALSE;
5623   B->insertmode = NOT_SET_VALUES;
5624   b->size       = size;
5625 
5626   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5627 
5628   /* build cache for off array entries formed */
5629   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5630 
5631   b->donotstash  = PETSC_FALSE;
5632   b->colmap      = 0;
5633   b->garray      = 0;
5634   b->roworiented = PETSC_TRUE;
5635 
5636   /* stuff used for matrix vector multiply */
5637   b->lvec  = NULL;
5638   b->Mvctx = NULL;
5639 
5640   /* stuff for MatGetRow() */
5641   b->rowindices   = 0;
5642   b->rowvalues    = 0;
5643   b->getrowactive = PETSC_FALSE;
5644 
5645   /* flexible pointer used in CUSP/CUSPARSE classes */
5646   b->spptr = NULL;
5647 
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5651   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5652   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5658 #if defined(PETSC_HAVE_MKL_SPARSE)
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5660 #endif
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5663 #if defined(PETSC_HAVE_ELEMENTAL)
5664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5665 #endif
5666 #if defined(PETSC_HAVE_HYPRE)
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5668 #endif
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5674 #if defined(PETSC_HAVE_HYPRE)
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5676 #endif
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5679   PetscFunctionReturn(0);
5680 }
5681 
5682 /*@C
5683      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5684          and "off-diagonal" part of the matrix in CSR format.
5685 
5686    Collective on MPI_Comm
5687 
5688    Input Parameters:
5689 +  comm - MPI communicator
5690 .  m - number of local rows (Cannot be PETSC_DECIDE)
5691 .  n - This value should be the same as the local size used in creating the
5692        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5693        calculated if N is given) For square matrices n is almost always m.
5694 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5695 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5696 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5697 .   j - column indices
5698 .   a - matrix values
5699 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5700 .   oj - column indices
5701 -   oa - matrix values
5702 
5703    Output Parameter:
5704 .   mat - the matrix
5705 
5706    Level: advanced
5707 
5708    Notes:
5709        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5710        must free the arrays once the matrix has been destroyed and not before.
5711 
5712        The i and j indices are 0 based
5713 
5714        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5715 
5716        This sets local rows and cannot be used to set off-processor values.
5717 
5718        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5719        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5720        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5721        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5722        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5723        communication if it is known that only local entries will be set.
5724 
5725 .keywords: matrix, aij, compressed row, sparse, parallel
5726 
5727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5728           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5729 @*/
5730 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5731 {
5732   PetscErrorCode ierr;
5733   Mat_MPIAIJ     *maij;
5734 
5735   PetscFunctionBegin;
5736   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5737   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5738   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5739   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5740   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5741   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5742   maij = (Mat_MPIAIJ*) (*mat)->data;
5743 
5744   (*mat)->preallocated = PETSC_TRUE;
5745 
5746   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5747   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5748 
5749   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5750   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5751 
5752   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5753   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5754   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5755   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5756 
5757   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5758   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5759   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5760   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5761   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5762   PetscFunctionReturn(0);
5763 }
5764 
5765 /*
5766     Special version for direct calls from Fortran
5767 */
5768 #include <petsc/private/fortranimpl.h>
5769 
5770 /* Change these macros so can be used in void function */
5771 #undef CHKERRQ
5772 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5773 #undef SETERRQ2
5774 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5775 #undef SETERRQ3
5776 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5777 #undef SETERRQ
5778 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5779 
5780 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5781 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5782 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5783 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5784 #else
5785 #endif
5786 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5787 {
5788   Mat            mat  = *mmat;
5789   PetscInt       m    = *mm, n = *mn;
5790   InsertMode     addv = *maddv;
5791   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5792   PetscScalar    value;
5793   PetscErrorCode ierr;
5794 
5795   MatCheckPreallocated(mat,1);
5796   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5797 
5798 #if defined(PETSC_USE_DEBUG)
5799   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5800 #endif
5801   {
5802     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5803     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5804     PetscBool roworiented = aij->roworiented;
5805 
5806     /* Some Variables required in the macro */
5807     Mat        A                 = aij->A;
5808     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5809     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5810     MatScalar  *aa               = a->a;
5811     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5812     Mat        B                 = aij->B;
5813     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5814     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5815     MatScalar  *ba               = b->a;
5816 
5817     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5818     PetscInt  nonew = a->nonew;
5819     MatScalar *ap1,*ap2;
5820 
5821     PetscFunctionBegin;
5822     for (i=0; i<m; i++) {
5823       if (im[i] < 0) continue;
5824 #if defined(PETSC_USE_DEBUG)
5825       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5826 #endif
5827       if (im[i] >= rstart && im[i] < rend) {
5828         row      = im[i] - rstart;
5829         lastcol1 = -1;
5830         rp1      = aj + ai[row];
5831         ap1      = aa + ai[row];
5832         rmax1    = aimax[row];
5833         nrow1    = ailen[row];
5834         low1     = 0;
5835         high1    = nrow1;
5836         lastcol2 = -1;
5837         rp2      = bj + bi[row];
5838         ap2      = ba + bi[row];
5839         rmax2    = bimax[row];
5840         nrow2    = bilen[row];
5841         low2     = 0;
5842         high2    = nrow2;
5843 
5844         for (j=0; j<n; j++) {
5845           if (roworiented) value = v[i*n+j];
5846           else value = v[i+j*m];
5847           if (in[j] >= cstart && in[j] < cend) {
5848             col = in[j] - cstart;
5849             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5850             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5851           } else if (in[j] < 0) continue;
5852 #if defined(PETSC_USE_DEBUG)
5853           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5854           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5855 #endif
5856           else {
5857             if (mat->was_assembled) {
5858               if (!aij->colmap) {
5859                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5860               }
5861 #if defined(PETSC_USE_CTABLE)
5862               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5863               col--;
5864 #else
5865               col = aij->colmap[in[j]] - 1;
5866 #endif
5867               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5868               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5869                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5870                 col  =  in[j];
5871                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5872                 B     = aij->B;
5873                 b     = (Mat_SeqAIJ*)B->data;
5874                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5875                 rp2   = bj + bi[row];
5876                 ap2   = ba + bi[row];
5877                 rmax2 = bimax[row];
5878                 nrow2 = bilen[row];
5879                 low2  = 0;
5880                 high2 = nrow2;
5881                 bm    = aij->B->rmap->n;
5882                 ba    = b->a;
5883               }
5884             } else col = in[j];
5885             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5886           }
5887         }
5888       } else if (!aij->donotstash) {
5889         if (roworiented) {
5890           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5891         } else {
5892           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5893         }
5894       }
5895     }
5896   }
5897   PetscFunctionReturnVoid();
5898 }
5899