xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bfcb38ea38335faa6e7f8d97f6bc6ce9aa2a1dd1)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
218   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) { \
444             ap1[_i] += value;   \
445             /* Not sure LogFlops will slow dow the code or not */ \
446             (void)PetscLogFlops(1.0);   \
447            } \
448           else                    ap1[_i] = value; \
449           goto a_noinsert; \
450         } \
451       }  \
452       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
453       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
454       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
455       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
456       N = nrow1++ - 1; a->nz++; high1++; \
457       /* shift up all the later entries in this row */ \
458       for (ii=N; ii>=_i; ii--) { \
459         rp1[ii+1] = rp1[ii]; \
460         ap1[ii+1] = ap1[ii]; \
461       } \
462       rp1[_i] = col;  \
463       ap1[_i] = value;  \
464       A->nonzerostate++;\
465       a_noinsert: ; \
466       ailen[row] = nrow1; \
467 }
468 
469 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
470   { \
471     if (col <= lastcol2) low2 = 0;                        \
472     else high2 = nrow2;                                   \
473     lastcol2 = col;                                       \
474     while (high2-low2 > 5) {                              \
475       t = (low2+high2)/2;                                 \
476       if (rp2[t] > col) high2 = t;                        \
477       else             low2  = t;                         \
478     }                                                     \
479     for (_i=low2; _i<high2; _i++) {                       \
480       if (rp2[_i] > col) break;                           \
481       if (rp2[_i] == col) {                               \
482         if (addv == ADD_VALUES) {                         \
483           ap2[_i] += value;                               \
484           (void)PetscLogFlops(1.0);                       \
485         }                                                 \
486         else                    ap2[_i] = value;          \
487         goto b_noinsert;                                  \
488       }                                                   \
489     }                                                     \
490     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
491     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
492     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
493     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
494     N = nrow2++ - 1; b->nz++; high2++;                    \
495     /* shift up all the later entries in this row */      \
496     for (ii=N; ii>=_i; ii--) {                            \
497       rp2[ii+1] = rp2[ii];                                \
498       ap2[ii+1] = ap2[ii];                                \
499     }                                                     \
500     rp2[_i] = col;                                        \
501     ap2[_i] = value;                                      \
502     B->nonzerostate++;                                    \
503     b_noinsert: ;                                         \
504     bilen[row] = nrow2;                                   \
505   }
506 
507 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
508 {
509   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
510   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
511   PetscErrorCode ierr;
512   PetscInt       l,*garray = mat->garray,diag;
513 
514   PetscFunctionBegin;
515   /* code only works for square matrices A */
516 
517   /* find size of row to the left of the diagonal part */
518   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
519   row  = row - diag;
520   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
521     if (garray[b->j[b->i[row]+l]] > diag) break;
522   }
523   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
524 
525   /* diagonal part */
526   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
527 
528   /* right of diagonal part */
529   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
530   PetscFunctionReturn(0);
531 }
532 
533 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
534 {
535   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
536   PetscScalar    value;
537   PetscErrorCode ierr;
538   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
539   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
540   PetscBool      roworiented = aij->roworiented;
541 
542   /* Some Variables required in the macro */
543   Mat        A                 = aij->A;
544   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
545   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
546   MatScalar  *aa               = a->a;
547   PetscBool  ignorezeroentries = a->ignorezeroentries;
548   Mat        B                 = aij->B;
549   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
550   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
551   MatScalar  *ba               = b->a;
552 
553   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
554   PetscInt  nonew;
555   MatScalar *ap1,*ap2;
556 
557   PetscFunctionBegin;
558   for (i=0; i<m; i++) {
559     if (im[i] < 0) continue;
560 #if defined(PETSC_USE_DEBUG)
561     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
562 #endif
563     if (im[i] >= rstart && im[i] < rend) {
564       row      = im[i] - rstart;
565       lastcol1 = -1;
566       rp1      = aj + ai[row];
567       ap1      = aa + ai[row];
568       rmax1    = aimax[row];
569       nrow1    = ailen[row];
570       low1     = 0;
571       high1    = nrow1;
572       lastcol2 = -1;
573       rp2      = bj + bi[row];
574       ap2      = ba + bi[row];
575       rmax2    = bimax[row];
576       nrow2    = bilen[row];
577       low2     = 0;
578       high2    = nrow2;
579 
580       for (j=0; j<n; j++) {
581         if (roworiented) value = v[i*n+j];
582         else             value = v[i+j*m];
583         if (in[j] >= cstart && in[j] < cend) {
584           col   = in[j] - cstart;
585           nonew = a->nonew;
586           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
587           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
588         } else if (in[j] < 0) continue;
589 #if defined(PETSC_USE_DEBUG)
590         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
591 #endif
592         else {
593           if (mat->was_assembled) {
594             if (!aij->colmap) {
595               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
596             }
597 #if defined(PETSC_USE_CTABLE)
598             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
599             col--;
600 #else
601             col = aij->colmap[in[j]] - 1;
602 #endif
603             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
604               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
605               col  =  in[j];
606               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
607               B     = aij->B;
608               b     = (Mat_SeqAIJ*)B->data;
609               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
610               rp2   = bj + bi[row];
611               ap2   = ba + bi[row];
612               rmax2 = bimax[row];
613               nrow2 = bilen[row];
614               low2  = 0;
615               high2 = nrow2;
616               bm    = aij->B->rmap->n;
617               ba    = b->a;
618             } else if (col < 0) {
619               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
620                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
621               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
622             }
623           } else col = in[j];
624           nonew = b->nonew;
625           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
626         }
627       }
628     } else {
629       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
630       if (!aij->donotstash) {
631         mat->assembled = PETSC_FALSE;
632         if (roworiented) {
633           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
634         } else {
635           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
636         }
637       }
638     }
639   }
640   PetscFunctionReturn(0);
641 }
642 
643 /*
644     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
645     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
646     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
647 */
648 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
649 {
650   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
651   Mat            A           = aij->A; /* diagonal part of the matrix */
652   Mat            B           = aij->B; /* offdiagonal part of the matrix */
653   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
654   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
655   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
656   PetscInt       *ailen      = a->ilen,*aj = a->j;
657   PetscInt       *bilen      = b->ilen,*bj = b->j;
658   PetscInt       am          = aij->A->rmap->n,j;
659   PetscInt       diag_so_far = 0,dnz;
660   PetscInt       offd_so_far = 0,onz;
661 
662   PetscFunctionBegin;
663   /* Iterate over all rows of the matrix */
664   for (j=0; j<am; j++) {
665     dnz = onz = 0;
666     /*  Iterate over all non-zero columns of the current row */
667     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
668       /* If column is in the diagonal */
669       if (mat_j[col] >= cstart && mat_j[col] < cend) {
670         aj[diag_so_far++] = mat_j[col] - cstart;
671         dnz++;
672       } else { /* off-diagonal entries */
673         bj[offd_so_far++] = mat_j[col];
674         onz++;
675       }
676     }
677     ailen[j] = dnz;
678     bilen[j] = onz;
679   }
680   PetscFunctionReturn(0);
681 }
682 
683 /*
684     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
685     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
686     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
687     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
688     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
689 */
690 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
691 {
692   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
693   Mat            A      = aij->A; /* diagonal part of the matrix */
694   Mat            B      = aij->B; /* offdiagonal part of the matrix */
695   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
696   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
697   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
698   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
699   PetscInt       *ailen = a->ilen,*aj = a->j;
700   PetscInt       *bilen = b->ilen,*bj = b->j;
701   PetscInt       am     = aij->A->rmap->n,j;
702   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
703   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
704   PetscScalar    *aa = a->a,*ba = b->a;
705 
706   PetscFunctionBegin;
707   /* Iterate over all rows of the matrix */
708   for (j=0; j<am; j++) {
709     dnz_row = onz_row = 0;
710     rowstart_offd = full_offd_i[j];
711     rowstart_diag = full_diag_i[j];
712     /*  Iterate over all non-zero columns of the current row */
713     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
714       /* If column is in the diagonal */
715       if (mat_j[col] >= cstart && mat_j[col] < cend) {
716         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
717         aa[rowstart_diag+dnz_row] = mat_a[col];
718         dnz_row++;
719       } else { /* off-diagonal entries */
720         bj[rowstart_offd+onz_row] = mat_j[col];
721         ba[rowstart_offd+onz_row] = mat_a[col];
722         onz_row++;
723       }
724     }
725     ailen[j] = dnz_row;
726     bilen[j] = onz_row;
727   }
728   PetscFunctionReturn(0);
729 }
730 
731 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
732 {
733   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
734   PetscErrorCode ierr;
735   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
736   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
737 
738   PetscFunctionBegin;
739   for (i=0; i<m; i++) {
740     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
741     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
742     if (idxm[i] >= rstart && idxm[i] < rend) {
743       row = idxm[i] - rstart;
744       for (j=0; j<n; j++) {
745         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
746         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
747         if (idxn[j] >= cstart && idxn[j] < cend) {
748           col  = idxn[j] - cstart;
749           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
750         } else {
751           if (!aij->colmap) {
752             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
753           }
754 #if defined(PETSC_USE_CTABLE)
755           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
756           col--;
757 #else
758           col = aij->colmap[idxn[j]] - 1;
759 #endif
760           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
761           else {
762             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
763           }
764         }
765       }
766     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
767   }
768   PetscFunctionReturn(0);
769 }
770 
771 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
772 
773 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
774 {
775   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
776   PetscErrorCode ierr;
777   PetscInt       nstash,reallocs;
778 
779   PetscFunctionBegin;
780   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
781 
782   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
783   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
784   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
789 {
790   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
791   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
792   PetscErrorCode ierr;
793   PetscMPIInt    n;
794   PetscInt       i,j,rstart,ncols,flg;
795   PetscInt       *row,*col;
796   PetscBool      other_disassembled;
797   PetscScalar    *val;
798 
799   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
800 
801   PetscFunctionBegin;
802   if (!aij->donotstash && !mat->nooffprocentries) {
803     while (1) {
804       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
805       if (!flg) break;
806 
807       for (i=0; i<n; ) {
808         /* Now identify the consecutive vals belonging to the same row */
809         for (j=i,rstart=row[j]; j<n; j++) {
810           if (row[j] != rstart) break;
811         }
812         if (j < n) ncols = j-i;
813         else       ncols = n-i;
814         /* Now assemble all these values with a single function call */
815         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
816 
817         i = j;
818       }
819     }
820     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
821   }
822   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
823   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
824 
825   /* determine if any processor has disassembled, if so we must
826      also disassemble ourselfs, in order that we may reassemble. */
827   /*
828      if nonzero structure of submatrix B cannot change then we know that
829      no processor disassembled thus we can skip this stuff
830   */
831   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
832     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
833     if (mat->was_assembled && !other_disassembled) {
834       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
835     }
836   }
837   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
838     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
839   }
840   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
841   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
842   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
843 
844   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
845 
846   aij->rowvalues = 0;
847 
848   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
849   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
850 
851   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
852   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
853     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
854     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
855   }
856   PetscFunctionReturn(0);
857 }
858 
859 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
860 {
861   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
862   PetscErrorCode ierr;
863 
864   PetscFunctionBegin;
865   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
866   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
871 {
872   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
873   PetscObjectState sA, sB;
874   PetscInt        *lrows;
875   PetscInt         r, len;
876   PetscBool        cong, lch, gch;
877   PetscErrorCode   ierr;
878 
879   PetscFunctionBegin;
880   /* get locally owned rows */
881   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
882   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
883   /* fix right hand side if needed */
884   if (x && b) {
885     const PetscScalar *xx;
886     PetscScalar       *bb;
887 
888     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
889     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
890     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
891     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
892     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
893     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
894   }
895 
896   sA = mat->A->nonzerostate;
897   sB = mat->B->nonzerostate;
898 
899   if (diag != 0.0 && cong) {
900     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
901     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
902   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
903     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
904     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
905     PetscInt   nnwA, nnwB;
906     PetscBool  nnzA, nnzB;
907 
908     nnwA = aijA->nonew;
909     nnwB = aijB->nonew;
910     nnzA = aijA->keepnonzeropattern;
911     nnzB = aijB->keepnonzeropattern;
912     if (!nnzA) {
913       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
914       aijA->nonew = 0;
915     }
916     if (!nnzB) {
917       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
918       aijB->nonew = 0;
919     }
920     /* Must zero here before the next loop */
921     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
922     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
923     for (r = 0; r < len; ++r) {
924       const PetscInt row = lrows[r] + A->rmap->rstart;
925       if (row >= A->cmap->N) continue;
926       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
927     }
928     aijA->nonew = nnwA;
929     aijB->nonew = nnwB;
930   } else {
931     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
932     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
933   }
934   ierr = PetscFree(lrows);CHKERRQ(ierr);
935   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
936   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
937 
938   /* reduce nonzerostate */
939   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
940   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941   if (gch) A->nonzerostate++;
942   PetscFunctionReturn(0);
943 }
944 
945 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
946 {
947   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
948   PetscErrorCode    ierr;
949   PetscMPIInt       n = A->rmap->n;
950   PetscInt          i,j,r,m,p = 0,len = 0;
951   PetscInt          *lrows,*owners = A->rmap->range;
952   PetscSFNode       *rrows;
953   PetscSF           sf;
954   const PetscScalar *xx;
955   PetscScalar       *bb,*mask;
956   Vec               xmask,lmask;
957   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
958   const PetscInt    *aj, *ii,*ridx;
959   PetscScalar       *aa;
960 
961   PetscFunctionBegin;
962   /* Create SF where leaves are input rows and roots are owned rows */
963   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
964   for (r = 0; r < n; ++r) lrows[r] = -1;
965   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
966   for (r = 0; r < N; ++r) {
967     const PetscInt idx   = rows[r];
968     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
969     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
970       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
971     }
972     rrows[r].rank  = p;
973     rrows[r].index = rows[r] - owners[p];
974   }
975   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
976   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
977   /* Collect flags for rows to be zeroed */
978   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
979   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
980   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
981   /* Compress and put in row numbers */
982   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
983   /* zero diagonal part of matrix */
984   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
985   /* handle off diagonal part of matrix */
986   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
987   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
988   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
989   for (i=0; i<len; i++) bb[lrows[i]] = 1;
990   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
991   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
994   if (x && b) { /* this code is buggy when the row and column layout don't match */
995     PetscBool cong;
996 
997     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
998     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
999     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1000     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1002     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1003   }
1004   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1005   /* remove zeroed rows of off diagonal matrix */
1006   ii = aij->i;
1007   for (i=0; i<len; i++) {
1008     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1009   }
1010   /* loop over all elements of off process part of matrix zeroing removed columns*/
1011   if (aij->compressedrow.use) {
1012     m    = aij->compressedrow.nrows;
1013     ii   = aij->compressedrow.i;
1014     ridx = aij->compressedrow.rindex;
1015     for (i=0; i<m; i++) {
1016       n  = ii[i+1] - ii[i];
1017       aj = aij->j + ii[i];
1018       aa = aij->a + ii[i];
1019 
1020       for (j=0; j<n; j++) {
1021         if (PetscAbsScalar(mask[*aj])) {
1022           if (b) bb[*ridx] -= *aa*xx[*aj];
1023           *aa = 0.0;
1024         }
1025         aa++;
1026         aj++;
1027       }
1028       ridx++;
1029     }
1030   } else { /* do not use compressed row format */
1031     m = l->B->rmap->n;
1032     for (i=0; i<m; i++) {
1033       n  = ii[i+1] - ii[i];
1034       aj = aij->j + ii[i];
1035       aa = aij->a + ii[i];
1036       for (j=0; j<n; j++) {
1037         if (PetscAbsScalar(mask[*aj])) {
1038           if (b) bb[i] -= *aa*xx[*aj];
1039           *aa = 0.0;
1040         }
1041         aa++;
1042         aj++;
1043       }
1044     }
1045   }
1046   if (x && b) {
1047     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1048     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1049   }
1050   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1051   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1052   ierr = PetscFree(lrows);CHKERRQ(ierr);
1053 
1054   /* only change matrix nonzero state if pattern was allowed to be changed */
1055   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1056     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1057     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1058   }
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1065   PetscErrorCode ierr;
1066   PetscInt       nt;
1067   VecScatter     Mvctx = a->Mvctx;
1068 
1069   PetscFunctionBegin;
1070   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1071   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1072 
1073   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1074   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1075   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1076   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1081 {
1082   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1083   PetscErrorCode ierr;
1084 
1085   PetscFunctionBegin;
1086   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1091 {
1092   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   VecScatter     Mvctx = a->Mvctx;
1095 
1096   PetscFunctionBegin;
1097   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1098   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1100   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1101   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   /* do nondiagonal part */
1112   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1113   /* do local part */
1114   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1115   /* add partial results together */
1116   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1122 {
1123   MPI_Comm       comm;
1124   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1125   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1126   IS             Me,Notme;
1127   PetscErrorCode ierr;
1128   PetscInt       M,N,first,last,*notme,i;
1129   PetscBool      lf;
1130   PetscMPIInt    size;
1131 
1132   PetscFunctionBegin;
1133   /* Easy test: symmetric diagonal block */
1134   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1135   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1136   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1137   if (!*f) PetscFunctionReturn(0);
1138   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1139   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1140   if (size == 1) PetscFunctionReturn(0);
1141 
1142   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1143   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1144   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1145   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1146   for (i=0; i<first; i++) notme[i] = i;
1147   for (i=last; i<M; i++) notme[i-last+first] = i;
1148   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1149   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1150   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1151   Aoff = Aoffs[0];
1152   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1153   Boff = Boffs[0];
1154   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1155   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1156   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1157   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1158   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1159   ierr = PetscFree(notme);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1164 {
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1173 {
1174   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   /* do nondiagonal part */
1179   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1180   /* do local part */
1181   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1182   /* add partial results together */
1183   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1184   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /*
1189   This only works correctly for square matrices where the subblock A->A is the
1190    diagonal block
1191 */
1192 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1193 {
1194   PetscErrorCode ierr;
1195   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1196 
1197   PetscFunctionBegin;
1198   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1199   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1200   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1205 {
1206   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1211   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1216 {
1217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221 #if defined(PETSC_USE_LOG)
1222   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1223 #endif
1224   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1225   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1226   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1227   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1228 #if defined(PETSC_USE_CTABLE)
1229   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1230 #else
1231   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1234   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1235   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1236   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1237   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1238   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1239   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1240 
1241   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1250 #if defined(PETSC_HAVE_ELEMENTAL)
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1252 #endif
1253 #if defined(PETSC_HAVE_HYPRE)
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1256 #endif
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1265   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1266   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1267   PetscErrorCode ierr;
1268   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1269   int            fd;
1270   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1271   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1272   PetscScalar    *column_values;
1273   PetscInt       message_count,flowcontrolcount;
1274   FILE           *file;
1275 
1276   PetscFunctionBegin;
1277   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1278   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1279   nz   = A->nz + B->nz;
1280   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1281   if (!rank) {
1282     header[0] = MAT_FILE_CLASSID;
1283     header[1] = mat->rmap->N;
1284     header[2] = mat->cmap->N;
1285 
1286     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1288     /* get largest number of rows any processor has */
1289     rlen  = mat->rmap->n;
1290     range = mat->rmap->range;
1291     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1292   } else {
1293     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1294     rlen = mat->rmap->n;
1295   }
1296 
1297   /* load up the local row counts */
1298   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1299   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1300 
1301   /* store the row lengths to the file */
1302   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1303   if (!rank) {
1304     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1305     for (i=1; i<size; i++) {
1306       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1307       rlen = range[i+1] - range[i];
1308       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1309       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1310     }
1311     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1312   } else {
1313     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1314     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1315     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1316   }
1317   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1318 
1319   /* load up the local column indices */
1320   nzmax = nz; /* th processor needs space a largest processor needs */
1321   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1322   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1323   cnt   = 0;
1324   for (i=0; i<mat->rmap->n; i++) {
1325     for (j=B->i[i]; j<B->i[i+1]; j++) {
1326       if ((col = garray[B->j[j]]) > cstart) break;
1327       column_indices[cnt++] = col;
1328     }
1329     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1330     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1331   }
1332   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1333 
1334   /* store the column indices to the file */
1335   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1336   if (!rank) {
1337     MPI_Status status;
1338     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1339     for (i=1; i<size; i++) {
1340       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1341       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1342       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1343       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1344       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1345     }
1346     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1347   } else {
1348     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1349     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1350     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1351     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1352   }
1353   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1354 
1355   /* load up the local column values */
1356   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1357   cnt  = 0;
1358   for (i=0; i<mat->rmap->n; i++) {
1359     for (j=B->i[i]; j<B->i[i+1]; j++) {
1360       if (garray[B->j[j]] > cstart) break;
1361       column_values[cnt++] = B->a[j];
1362     }
1363     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1364     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1365   }
1366   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1367 
1368   /* store the column values to the file */
1369   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1370   if (!rank) {
1371     MPI_Status status;
1372     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1373     for (i=1; i<size; i++) {
1374       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1375       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1376       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1377       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1378       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1379     }
1380     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1381   } else {
1382     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1383     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1384     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1385     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1386   }
1387   ierr = PetscFree(column_values);CHKERRQ(ierr);
1388 
1389   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1390   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #include <petscdraw.h>
1395 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1396 {
1397   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1398   PetscErrorCode    ierr;
1399   PetscMPIInt       rank = aij->rank,size = aij->size;
1400   PetscBool         isdraw,iascii,isbinary;
1401   PetscViewer       sviewer;
1402   PetscViewerFormat format;
1403 
1404   PetscFunctionBegin;
1405   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1407   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1408   if (iascii) {
1409     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1410     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1411       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1412       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1413       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1414       for (i=0; i<(PetscInt)size; i++) {
1415         nmax = PetscMax(nmax,nz[i]);
1416         nmin = PetscMin(nmin,nz[i]);
1417         navg += nz[i];
1418       }
1419       ierr = PetscFree(nz);CHKERRQ(ierr);
1420       navg = navg/size;
1421       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1422       PetscFunctionReturn(0);
1423     }
1424     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1425     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1426       MatInfo   info;
1427       PetscBool inodes;
1428 
1429       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1430       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1431       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1433       if (!inodes) {
1434         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1435                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1436       } else {
1437         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1438                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1439       }
1440       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1441       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1442       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1443       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1444       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1445       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1446       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1447       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1448       PetscFunctionReturn(0);
1449     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1450       PetscInt inodecount,inodelimit,*inodes;
1451       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1452       if (inodes) {
1453         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1454       } else {
1455         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1456       }
1457       PetscFunctionReturn(0);
1458     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1459       PetscFunctionReturn(0);
1460     }
1461   } else if (isbinary) {
1462     if (size == 1) {
1463       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1464       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1465     } else {
1466       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1467     }
1468     PetscFunctionReturn(0);
1469   } else if (iascii && size == 1) {
1470     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1471     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1472     PetscFunctionReturn(0);
1473   } else if (isdraw) {
1474     PetscDraw draw;
1475     PetscBool isnull;
1476     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1477     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1478     if (isnull) PetscFunctionReturn(0);
1479   }
1480 
1481   { /* assemble the entire matrix onto first processor */
1482     Mat A = NULL, Av;
1483     IS  isrow,iscol;
1484 
1485     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1486     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1487     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1488     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1489 /*  The commented code uses MatCreateSubMatrices instead */
1490 /*
1491     Mat *AA, A = NULL, Av;
1492     IS  isrow,iscol;
1493 
1494     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1495     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1496     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1497     if (!rank) {
1498        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1499        A    = AA[0];
1500        Av   = AA[0];
1501     }
1502     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1503 */
1504     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1505     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1506     /*
1507        Everyone has to call to draw the matrix since the graphics waits are
1508        synchronized across all processors that share the PetscDraw object
1509     */
1510     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1511     if (!rank) {
1512       if (((PetscObject)mat)->name) {
1513         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1514       }
1515       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1516     }
1517     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1518     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1519     ierr = MatDestroy(&A);CHKERRQ(ierr);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1525 {
1526   PetscErrorCode ierr;
1527   PetscBool      iascii,isdraw,issocket,isbinary;
1528 
1529   PetscFunctionBegin;
1530   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1531   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1532   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1533   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1534   if (iascii || isdraw || isbinary || issocket) {
1535     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1541 {
1542   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1543   PetscErrorCode ierr;
1544   Vec            bb1 = 0;
1545   PetscBool      hasop;
1546 
1547   PetscFunctionBegin;
1548   if (flag == SOR_APPLY_UPPER) {
1549     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1550     PetscFunctionReturn(0);
1551   }
1552 
1553   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1554     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1555   }
1556 
1557   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1558     if (flag & SOR_ZERO_INITIAL_GUESS) {
1559       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1560       its--;
1561     }
1562 
1563     while (its--) {
1564       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1565       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1566 
1567       /* update rhs: bb1 = bb - B*x */
1568       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1569       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1570 
1571       /* local sweep */
1572       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1573     }
1574   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1575     if (flag & SOR_ZERO_INITIAL_GUESS) {
1576       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1577       its--;
1578     }
1579     while (its--) {
1580       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1581       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582 
1583       /* update rhs: bb1 = bb - B*x */
1584       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1585       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1586 
1587       /* local sweep */
1588       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1589     }
1590   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1591     if (flag & SOR_ZERO_INITIAL_GUESS) {
1592       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1593       its--;
1594     }
1595     while (its--) {
1596       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1597       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1598 
1599       /* update rhs: bb1 = bb - B*x */
1600       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1601       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1602 
1603       /* local sweep */
1604       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1605     }
1606   } else if (flag & SOR_EISENSTAT) {
1607     Vec xx1;
1608 
1609     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1610     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1611 
1612     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614     if (!mat->diag) {
1615       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1616       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1617     }
1618     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1619     if (hasop) {
1620       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1621     } else {
1622       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1623     }
1624     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1625 
1626     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1627 
1628     /* local sweep */
1629     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1630     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1631     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1632   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1633 
1634   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1635 
1636   matin->factorerrortype = mat->A->factorerrortype;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1641 {
1642   Mat            aA,aB,Aperm;
1643   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1644   PetscScalar    *aa,*ba;
1645   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1646   PetscSF        rowsf,sf;
1647   IS             parcolp = NULL;
1648   PetscBool      done;
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1653   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1654   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1655   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1656 
1657   /* Invert row permutation to find out where my rows should go */
1658   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1659   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1660   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1661   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1662   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1663   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1664 
1665   /* Invert column permutation to find out where my columns should go */
1666   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1667   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1668   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1669   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1670   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1671   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1672   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1673 
1674   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1675   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1676   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1677 
1678   /* Find out where my gcols should go */
1679   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1680   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1681   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1682   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1683   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1684   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1685   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1686   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1687 
1688   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1689   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1690   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1691   for (i=0; i<m; i++) {
1692     PetscInt row = rdest[i],rowner;
1693     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1694     for (j=ai[i]; j<ai[i+1]; j++) {
1695       PetscInt cowner,col = cdest[aj[j]];
1696       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1697       if (rowner == cowner) dnnz[i]++;
1698       else onnz[i]++;
1699     }
1700     for (j=bi[i]; j<bi[i+1]; j++) {
1701       PetscInt cowner,col = gcdest[bj[j]];
1702       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1703       if (rowner == cowner) dnnz[i]++;
1704       else onnz[i]++;
1705     }
1706   }
1707   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1708   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1709   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1710   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1711   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1712 
1713   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1714   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1715   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1716   for (i=0; i<m; i++) {
1717     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1718     PetscInt j0,rowlen;
1719     rowlen = ai[i+1] - ai[i];
1720     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1721       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1722       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1723     }
1724     rowlen = bi[i+1] - bi[i];
1725     for (j0=j=0; j<rowlen; j0=j) {
1726       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1727       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1728     }
1729   }
1730   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1731   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1732   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1733   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1734   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1735   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1736   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1737   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1738   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1739   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1740   *B = Aperm;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1745 {
1746   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1751   if (ghosts) *ghosts = aij->garray;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1756 {
1757   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1758   Mat            A    = mat->A,B = mat->B;
1759   PetscErrorCode ierr;
1760   PetscReal      isend[5],irecv[5];
1761 
1762   PetscFunctionBegin;
1763   info->block_size = 1.0;
1764   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1765 
1766   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1767   isend[3] = info->memory;  isend[4] = info->mallocs;
1768 
1769   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1770 
1771   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1772   isend[3] += info->memory;  isend[4] += info->mallocs;
1773   if (flag == MAT_LOCAL) {
1774     info->nz_used      = isend[0];
1775     info->nz_allocated = isend[1];
1776     info->nz_unneeded  = isend[2];
1777     info->memory       = isend[3];
1778     info->mallocs      = isend[4];
1779   } else if (flag == MAT_GLOBAL_MAX) {
1780     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1781 
1782     info->nz_used      = irecv[0];
1783     info->nz_allocated = irecv[1];
1784     info->nz_unneeded  = irecv[2];
1785     info->memory       = irecv[3];
1786     info->mallocs      = irecv[4];
1787   } else if (flag == MAT_GLOBAL_SUM) {
1788     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1789 
1790     info->nz_used      = irecv[0];
1791     info->nz_allocated = irecv[1];
1792     info->nz_unneeded  = irecv[2];
1793     info->memory       = irecv[3];
1794     info->mallocs      = irecv[4];
1795   }
1796   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1797   info->fill_ratio_needed = 0;
1798   info->factor_mallocs    = 0;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1803 {
1804   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1805   PetscErrorCode ierr;
1806 
1807   PetscFunctionBegin;
1808   switch (op) {
1809   case MAT_NEW_NONZERO_LOCATIONS:
1810   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1811   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1812   case MAT_KEEP_NONZERO_PATTERN:
1813   case MAT_NEW_NONZERO_LOCATION_ERR:
1814   case MAT_USE_INODES:
1815   case MAT_IGNORE_ZERO_ENTRIES:
1816     MatCheckPreallocated(A,1);
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1819     break;
1820   case MAT_ROW_ORIENTED:
1821     MatCheckPreallocated(A,1);
1822     a->roworiented = flg;
1823 
1824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1825     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1826     break;
1827   case MAT_NEW_DIAGONALS:
1828     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1829     break;
1830   case MAT_IGNORE_OFF_PROC_ENTRIES:
1831     a->donotstash = flg;
1832     break;
1833   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1834   case MAT_SPD:
1835   case MAT_SYMMETRIC:
1836   case MAT_STRUCTURALLY_SYMMETRIC:
1837   case MAT_HERMITIAN:
1838   case MAT_SYMMETRY_ETERNAL:
1839     break;
1840   case MAT_SUBMAT_SINGLEIS:
1841     A->submat_singleis = flg;
1842     break;
1843   case MAT_STRUCTURE_ONLY:
1844     /* The option is handled directly by MatSetOption() */
1845     break;
1846   default:
1847     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1848   }
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1853 {
1854   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1855   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1856   PetscErrorCode ierr;
1857   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1858   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1859   PetscInt       *cmap,*idx_p;
1860 
1861   PetscFunctionBegin;
1862   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1863   mat->getrowactive = PETSC_TRUE;
1864 
1865   if (!mat->rowvalues && (idx || v)) {
1866     /*
1867         allocate enough space to hold information from the longest row.
1868     */
1869     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1870     PetscInt   max = 1,tmp;
1871     for (i=0; i<matin->rmap->n; i++) {
1872       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1873       if (max < tmp) max = tmp;
1874     }
1875     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1876   }
1877 
1878   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1879   lrow = row - rstart;
1880 
1881   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1882   if (!v)   {pvA = 0; pvB = 0;}
1883   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1884   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1885   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1886   nztot = nzA + nzB;
1887 
1888   cmap = mat->garray;
1889   if (v  || idx) {
1890     if (nztot) {
1891       /* Sort by increasing column numbers, assuming A and B already sorted */
1892       PetscInt imark = -1;
1893       if (v) {
1894         *v = v_p = mat->rowvalues;
1895         for (i=0; i<nzB; i++) {
1896           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1897           else break;
1898         }
1899         imark = i;
1900         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1901         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1902       }
1903       if (idx) {
1904         *idx = idx_p = mat->rowindices;
1905         if (imark > -1) {
1906           for (i=0; i<imark; i++) {
1907             idx_p[i] = cmap[cworkB[i]];
1908           }
1909         } else {
1910           for (i=0; i<nzB; i++) {
1911             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1912             else break;
1913           }
1914           imark = i;
1915         }
1916         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1917         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1918       }
1919     } else {
1920       if (idx) *idx = 0;
1921       if (v)   *v   = 0;
1922     }
1923   }
1924   *nz  = nztot;
1925   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1931 {
1932   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1933 
1934   PetscFunctionBegin;
1935   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1936   aij->getrowactive = PETSC_FALSE;
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1941 {
1942   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1943   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1944   PetscErrorCode ierr;
1945   PetscInt       i,j,cstart = mat->cmap->rstart;
1946   PetscReal      sum = 0.0;
1947   MatScalar      *v;
1948 
1949   PetscFunctionBegin;
1950   if (aij->size == 1) {
1951     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1952   } else {
1953     if (type == NORM_FROBENIUS) {
1954       v = amat->a;
1955       for (i=0; i<amat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       v = bmat->a;
1959       for (i=0; i<bmat->nz; i++) {
1960         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1961       }
1962       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1963       *norm = PetscSqrtReal(*norm);
1964       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1965     } else if (type == NORM_1) { /* max column norm */
1966       PetscReal *tmp,*tmp2;
1967       PetscInt  *jj,*garray = aij->garray;
1968       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1969       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1970       *norm = 0.0;
1971       v     = amat->a; jj = amat->j;
1972       for (j=0; j<amat->nz; j++) {
1973         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1974       }
1975       v = bmat->a; jj = bmat->j;
1976       for (j=0; j<bmat->nz; j++) {
1977         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1978       }
1979       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1980       for (j=0; j<mat->cmap->N; j++) {
1981         if (tmp2[j] > *norm) *norm = tmp2[j];
1982       }
1983       ierr = PetscFree(tmp);CHKERRQ(ierr);
1984       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1985       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1986     } else if (type == NORM_INFINITY) { /* max row norm */
1987       PetscReal ntemp = 0.0;
1988       for (j=0; j<aij->A->rmap->n; j++) {
1989         v   = amat->a + amat->i[j];
1990         sum = 0.0;
1991         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         v = bmat->a + bmat->i[j];
1995         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1996           sum += PetscAbsScalar(*v); v++;
1997         }
1998         if (sum > ntemp) ntemp = sum;
1999       }
2000       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2001       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2002     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2008 {
2009   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
2010   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2011   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
2012   PetscErrorCode ierr;
2013   Mat            B,A_diag,*B_diag;
2014   MatScalar      *array;
2015 
2016   PetscFunctionBegin;
2017   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2018   ai = Aloc->i; aj = Aloc->j;
2019   bi = Bloc->i; bj = Bloc->j;
2020   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2021     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2022     PetscSFNode          *oloc;
2023     PETSC_UNUSED PetscSF sf;
2024 
2025     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2026     /* compute d_nnz for preallocation */
2027     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2028     for (i=0; i<ai[ma]; i++) {
2029       d_nnz[aj[i]]++;
2030     }
2031     /* compute local off-diagonal contributions */
2032     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2033     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2034     /* map those to global */
2035     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2036     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2037     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2038     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2039     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2040     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2041     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2042 
2043     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2044     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2045     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2046     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2047     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2048     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2049   } else {
2050     B    = *matout;
2051     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2052   }
2053 
2054   b           = (Mat_MPIAIJ*)B->data;
2055   A_diag      = a->A;
2056   B_diag      = &b->A;
2057   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2058   A_diag_ncol = A_diag->cmap->N;
2059   B_diag_ilen = sub_B_diag->ilen;
2060   B_diag_i    = sub_B_diag->i;
2061 
2062   /* Set ilen for diagonal of B */
2063   for (i=0; i<A_diag_ncol; i++) {
2064     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2065   }
2066 
2067   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2068   very quickly (=without using MatSetValues), because all writes are local. */
2069   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2070 
2071   /* copy over the B part */
2072   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2073   array = Bloc->a;
2074   row   = A->rmap->rstart;
2075   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2076   cols_tmp = cols;
2077   for (i=0; i<mb; i++) {
2078     ncol = bi[i+1]-bi[i];
2079     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2080     row++;
2081     array += ncol; cols_tmp += ncol;
2082   }
2083   ierr = PetscFree(cols);CHKERRQ(ierr);
2084 
2085   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2086   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2087   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2088     *matout = B;
2089   } else {
2090     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2096 {
2097   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2098   Mat            a    = aij->A,b = aij->B;
2099   PetscErrorCode ierr;
2100   PetscInt       s1,s2,s3;
2101 
2102   PetscFunctionBegin;
2103   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2104   if (rr) {
2105     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2106     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2107     /* Overlap communication with computation. */
2108     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2109   }
2110   if (ll) {
2111     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2112     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2113     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2114   }
2115   /* scale  the diagonal block */
2116   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2117 
2118   if (rr) {
2119     /* Do a scatter end and then right scale the off-diagonal block */
2120     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2121     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2127 {
2128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2129   PetscErrorCode ierr;
2130 
2131   PetscFunctionBegin;
2132   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2137 {
2138   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2139   Mat            a,b,c,d;
2140   PetscBool      flg;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   a = matA->A; b = matA->B;
2145   c = matB->A; d = matB->B;
2146 
2147   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2148   if (flg) {
2149     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2150   }
2151   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2156 {
2157   PetscErrorCode ierr;
2158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2159   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2160 
2161   PetscFunctionBegin;
2162   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2163   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2164     /* because of the column compression in the off-processor part of the matrix a->B,
2165        the number of columns in a->B and b->B may be different, hence we cannot call
2166        the MatCopy() directly on the two parts. If need be, we can provide a more
2167        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2168        then copying the submatrices */
2169     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2170   } else {
2171     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2172     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2173   }
2174   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2179 {
2180   PetscErrorCode ierr;
2181 
2182   PetscFunctionBegin;
2183   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 /*
2188    Computes the number of nonzeros per row needed for preallocation when X and Y
2189    have different nonzero structure.
2190 */
2191 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2192 {
2193   PetscInt       i,j,k,nzx,nzy;
2194 
2195   PetscFunctionBegin;
2196   /* Set the number of nonzeros in the new matrix */
2197   for (i=0; i<m; i++) {
2198     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2199     nzx = xi[i+1] - xi[i];
2200     nzy = yi[i+1] - yi[i];
2201     nnz[i] = 0;
2202     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2203       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2204       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2205       nnz[i]++;
2206     }
2207     for (; k<nzy; k++) nnz[i]++;
2208   }
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2213 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2214 {
2215   PetscErrorCode ierr;
2216   PetscInt       m = Y->rmap->N;
2217   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2218   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2219 
2220   PetscFunctionBegin;
2221   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2226 {
2227   PetscErrorCode ierr;
2228   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2229   PetscBLASInt   bnz,one=1;
2230   Mat_SeqAIJ     *x,*y;
2231 
2232   PetscFunctionBegin;
2233   if (str == SAME_NONZERO_PATTERN) {
2234     PetscScalar alpha = a;
2235     x    = (Mat_SeqAIJ*)xx->A->data;
2236     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2237     y    = (Mat_SeqAIJ*)yy->A->data;
2238     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2239     x    = (Mat_SeqAIJ*)xx->B->data;
2240     y    = (Mat_SeqAIJ*)yy->B->data;
2241     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2242     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2243     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2244   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2245     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2246   } else {
2247     Mat      B;
2248     PetscInt *nnz_d,*nnz_o;
2249     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2250     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2251     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2252     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2253     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2254     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2255     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2256     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2257     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2258     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2259     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2260     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2261     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2262     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2263   }
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2268 
2269 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2270 {
2271 #if defined(PETSC_USE_COMPLEX)
2272   PetscErrorCode ierr;
2273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2274 
2275   PetscFunctionBegin;
2276   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2277   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2278 #else
2279   PetscFunctionBegin;
2280 #endif
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2285 {
2286   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2291   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2296 {
2297   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2298   PetscErrorCode ierr;
2299 
2300   PetscFunctionBegin;
2301   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2302   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2307 {
2308   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2309   PetscErrorCode ierr;
2310   PetscInt       i,*idxb = 0;
2311   PetscScalar    *va,*vb;
2312   Vec            vtmp;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2316   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2317   if (idx) {
2318     for (i=0; i<A->rmap->n; i++) {
2319       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2320     }
2321   }
2322 
2323   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2324   if (idx) {
2325     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2326   }
2327   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2328   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2329 
2330   for (i=0; i<A->rmap->n; i++) {
2331     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2332       va[i] = vb[i];
2333       if (idx) idx[i] = a->garray[idxb[i]];
2334     }
2335   }
2336 
2337   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2338   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2339   ierr = PetscFree(idxb);CHKERRQ(ierr);
2340   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2345 {
2346   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2347   PetscErrorCode ierr;
2348   PetscInt       i,*idxb = 0;
2349   PetscScalar    *va,*vb;
2350   Vec            vtmp;
2351 
2352   PetscFunctionBegin;
2353   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2354   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2355   if (idx) {
2356     for (i=0; i<A->cmap->n; i++) {
2357       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2358     }
2359   }
2360 
2361   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2362   if (idx) {
2363     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2364   }
2365   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2366   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2367 
2368   for (i=0; i<A->rmap->n; i++) {
2369     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2370       va[i] = vb[i];
2371       if (idx) idx[i] = a->garray[idxb[i]];
2372     }
2373   }
2374 
2375   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2376   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2377   ierr = PetscFree(idxb);CHKERRQ(ierr);
2378   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2383 {
2384   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2385   PetscInt       n      = A->rmap->n;
2386   PetscInt       cstart = A->cmap->rstart;
2387   PetscInt       *cmap  = mat->garray;
2388   PetscInt       *diagIdx, *offdiagIdx;
2389   Vec            diagV, offdiagV;
2390   PetscScalar    *a, *diagA, *offdiagA;
2391   PetscInt       r;
2392   PetscErrorCode ierr;
2393 
2394   PetscFunctionBegin;
2395   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2396   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2397   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2398   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2399   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2401   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2402   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2403   for (r = 0; r < n; ++r) {
2404     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2405       a[r]   = diagA[r];
2406       idx[r] = cstart + diagIdx[r];
2407     } else {
2408       a[r]   = offdiagA[r];
2409       idx[r] = cmap[offdiagIdx[r]];
2410     }
2411   }
2412   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2413   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2414   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2415   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2416   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2417   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2422 {
2423   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2424   PetscInt       n      = A->rmap->n;
2425   PetscInt       cstart = A->cmap->rstart;
2426   PetscInt       *cmap  = mat->garray;
2427   PetscInt       *diagIdx, *offdiagIdx;
2428   Vec            diagV, offdiagV;
2429   PetscScalar    *a, *diagA, *offdiagA;
2430   PetscInt       r;
2431   PetscErrorCode ierr;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2435   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2436   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2437   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2438   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2439   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2440   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2441   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2442   for (r = 0; r < n; ++r) {
2443     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2444       a[r]   = diagA[r];
2445       idx[r] = cstart + diagIdx[r];
2446     } else {
2447       a[r]   = offdiagA[r];
2448       idx[r] = cmap[offdiagIdx[r]];
2449     }
2450   }
2451   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2452   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2453   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2454   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2455   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2456   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2461 {
2462   PetscErrorCode ierr;
2463   Mat            *dummy;
2464 
2465   PetscFunctionBegin;
2466   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2467   *newmat = *dummy;
2468   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2473 {
2474   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2479   A->factorerrortype = a->A->factorerrortype;
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2484 {
2485   PetscErrorCode ierr;
2486   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2487 
2488   PetscFunctionBegin;
2489   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2490   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2491   if (x->assembled) {
2492     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2493   } else {
2494     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2495   }
2496   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2497   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2502 {
2503   PetscFunctionBegin;
2504   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2505   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 /*@
2510    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2511 
2512    Collective on Mat
2513 
2514    Input Parameters:
2515 +    A - the matrix
2516 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2517 
2518  Level: advanced
2519 
2520 @*/
2521 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2522 {
2523   PetscErrorCode       ierr;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2531 {
2532   PetscErrorCode       ierr;
2533   PetscBool            sc = PETSC_FALSE,flg;
2534 
2535   PetscFunctionBegin;
2536   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2537   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2538   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2539   if (flg) {
2540     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2541   }
2542   ierr = PetscOptionsTail();CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2547 {
2548   PetscErrorCode ierr;
2549   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2550   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2551 
2552   PetscFunctionBegin;
2553   if (!Y->preallocated) {
2554     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2555   } else if (!aij->nz) {
2556     PetscInt nonew = aij->nonew;
2557     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2558     aij->nonew = nonew;
2559   }
2560   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2565 {
2566   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2567   PetscErrorCode ierr;
2568 
2569   PetscFunctionBegin;
2570   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2571   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2572   if (d) {
2573     PetscInt rstart;
2574     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2575     *d += rstart;
2576 
2577   }
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2582 {
2583   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2584   PetscErrorCode ierr;
2585 
2586   PetscFunctionBegin;
2587   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 /* -------------------------------------------------------------------*/
2592 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2593                                        MatGetRow_MPIAIJ,
2594                                        MatRestoreRow_MPIAIJ,
2595                                        MatMult_MPIAIJ,
2596                                 /* 4*/ MatMultAdd_MPIAIJ,
2597                                        MatMultTranspose_MPIAIJ,
2598                                        MatMultTransposeAdd_MPIAIJ,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                 /*10*/ 0,
2603                                        0,
2604                                        0,
2605                                        MatSOR_MPIAIJ,
2606                                        MatTranspose_MPIAIJ,
2607                                 /*15*/ MatGetInfo_MPIAIJ,
2608                                        MatEqual_MPIAIJ,
2609                                        MatGetDiagonal_MPIAIJ,
2610                                        MatDiagonalScale_MPIAIJ,
2611                                        MatNorm_MPIAIJ,
2612                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2613                                        MatAssemblyEnd_MPIAIJ,
2614                                        MatSetOption_MPIAIJ,
2615                                        MatZeroEntries_MPIAIJ,
2616                                 /*24*/ MatZeroRows_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                 /*29*/ MatSetUp_MPIAIJ,
2622                                        0,
2623                                        0,
2624                                        MatGetDiagonalBlock_MPIAIJ,
2625                                        0,
2626                                 /*34*/ MatDuplicate_MPIAIJ,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                 /*39*/ MatAXPY_MPIAIJ,
2632                                        MatCreateSubMatrices_MPIAIJ,
2633                                        MatIncreaseOverlap_MPIAIJ,
2634                                        MatGetValues_MPIAIJ,
2635                                        MatCopy_MPIAIJ,
2636                                 /*44*/ MatGetRowMax_MPIAIJ,
2637                                        MatScale_MPIAIJ,
2638                                        MatShift_MPIAIJ,
2639                                        MatDiagonalSet_MPIAIJ,
2640                                        MatZeroRowsColumns_MPIAIJ,
2641                                 /*49*/ MatSetRandom_MPIAIJ,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2647                                        0,
2648                                        MatSetUnfactored_MPIAIJ,
2649                                        MatPermute_MPIAIJ,
2650                                        0,
2651                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2652                                        MatDestroy_MPIAIJ,
2653                                        MatView_MPIAIJ,
2654                                        0,
2655                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2656                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2657                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2662                                        MatGetRowMinAbs_MPIAIJ,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*75*/ MatFDColoringApply_AIJ,
2668                                        MatSetFromOptions_MPIAIJ,
2669                                        0,
2670                                        0,
2671                                        MatFindZeroDiagonals_MPIAIJ,
2672                                 /*80*/ 0,
2673                                        0,
2674                                        0,
2675                                 /*83*/ MatLoad_MPIAIJ,
2676                                        MatIsSymmetric_MPIAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2682                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2683                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2684                                        MatPtAP_MPIAIJ_MPIAIJ,
2685                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2686                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                 /*99*/ 0,
2692                                        0,
2693                                        0,
2694                                        MatConjugate_MPIAIJ,
2695                                        0,
2696                                 /*104*/MatSetValuesRow_MPIAIJ,
2697                                        MatRealPart_MPIAIJ,
2698                                        MatImaginaryPart_MPIAIJ,
2699                                        0,
2700                                        0,
2701                                 /*109*/0,
2702                                        0,
2703                                        MatGetRowMin_MPIAIJ,
2704                                        0,
2705                                        MatMissingDiagonal_MPIAIJ,
2706                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2707                                        0,
2708                                        MatGetGhosts_MPIAIJ,
2709                                        0,
2710                                        0,
2711                                 /*119*/0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        MatGetMultiProcBlock_MPIAIJ,
2716                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2717                                        MatGetColumnNorms_MPIAIJ,
2718                                        MatInvertBlockDiagonal_MPIAIJ,
2719                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2720                                        MatCreateSubMatricesMPI_MPIAIJ,
2721                                 /*129*/0,
2722                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2723                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2724                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2725                                        0,
2726                                 /*134*/0,
2727                                        0,
2728                                        MatRARt_MPIAIJ_MPIAIJ,
2729                                        0,
2730                                        0,
2731                                 /*139*/MatSetBlockSizes_MPIAIJ,
2732                                        0,
2733                                        0,
2734                                        MatFDColoringSetUp_MPIXAIJ,
2735                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2736                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2737 };
2738 
2739 /* ----------------------------------------------------------------------------------------*/
2740 
2741 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2742 {
2743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2748   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2753 {
2754   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2759   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2764 {
2765   Mat_MPIAIJ     *b;
2766   PetscErrorCode ierr;
2767   PetscMPIInt    size;
2768 
2769   PetscFunctionBegin;
2770   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2772   b = (Mat_MPIAIJ*)B->data;
2773 
2774 #if defined(PETSC_USE_CTABLE)
2775   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2776 #else
2777   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2778 #endif
2779   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2780   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2781   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2782 
2783   /* Because the B will have been resized we simply destroy it and create a new one each time */
2784   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2785   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2786   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2787   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2788   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2789   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2790   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2791 
2792   if (!B->preallocated) {
2793     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2794     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2795     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2796     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2797     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2798   }
2799 
2800   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2801   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2802   B->preallocated  = PETSC_TRUE;
2803   B->was_assembled = PETSC_FALSE;
2804   B->assembled     = PETSC_FALSE;
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2809 {
2810   Mat_MPIAIJ     *b;
2811   PetscErrorCode ierr;
2812 
2813   PetscFunctionBegin;
2814   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2815   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2817   b = (Mat_MPIAIJ*)B->data;
2818 
2819 #if defined(PETSC_USE_CTABLE)
2820   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2821 #else
2822   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2823 #endif
2824   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2825   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2826   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2827 
2828   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2829   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2830   B->preallocated  = PETSC_TRUE;
2831   B->was_assembled = PETSC_FALSE;
2832   B->assembled = PETSC_FALSE;
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2837 {
2838   Mat            mat;
2839   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2840   PetscErrorCode ierr;
2841 
2842   PetscFunctionBegin;
2843   *newmat = 0;
2844   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2845   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2846   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2847   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2848   a       = (Mat_MPIAIJ*)mat->data;
2849 
2850   mat->factortype   = matin->factortype;
2851   mat->assembled    = PETSC_TRUE;
2852   mat->insertmode   = NOT_SET_VALUES;
2853   mat->preallocated = PETSC_TRUE;
2854 
2855   a->size         = oldmat->size;
2856   a->rank         = oldmat->rank;
2857   a->donotstash   = oldmat->donotstash;
2858   a->roworiented  = oldmat->roworiented;
2859   a->rowindices   = 0;
2860   a->rowvalues    = 0;
2861   a->getrowactive = PETSC_FALSE;
2862 
2863   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2864   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2865 
2866   if (oldmat->colmap) {
2867 #if defined(PETSC_USE_CTABLE)
2868     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2869 #else
2870     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2871     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2872     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2873 #endif
2874   } else a->colmap = 0;
2875   if (oldmat->garray) {
2876     PetscInt len;
2877     len  = oldmat->B->cmap->n;
2878     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2879     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2880     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2881   } else a->garray = 0;
2882 
2883   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2884   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2885   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2886   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2887 
2888   if (oldmat->Mvctx_mpi1) {
2889     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2890     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2891   }
2892 
2893   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2894   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2895   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2896   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2897   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2898   *newmat = mat;
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2903 {
2904   PetscBool      isbinary, ishdf5;
2905   PetscErrorCode ierr;
2906 
2907   PetscFunctionBegin;
2908   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2909   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2910   /* force binary viewer to load .info file if it has not yet done so */
2911   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2912   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2913   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2914   if (isbinary) {
2915     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2916   } else if (ishdf5) {
2917 #if defined(PETSC_HAVE_HDF5)
2918     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2919 #else
2920     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2921 #endif
2922   } else {
2923     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2924   }
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2929 {
2930   PetscScalar    *vals,*svals;
2931   MPI_Comm       comm;
2932   PetscErrorCode ierr;
2933   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2934   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2935   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2936   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2937   PetscInt       cend,cstart,n,*rowners;
2938   int            fd;
2939   PetscInt       bs = newMat->rmap->bs;
2940 
2941   PetscFunctionBegin;
2942   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2943   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2944   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2945   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2946   if (!rank) {
2947     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2948     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2949     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2950   }
2951 
2952   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2953   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2954   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2955   if (bs < 0) bs = 1;
2956 
2957   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2958   M    = header[1]; N = header[2];
2959 
2960   /* If global sizes are set, check if they are consistent with that given in the file */
2961   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2962   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2963 
2964   /* determine ownership of all (block) rows */
2965   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2966   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2967   else m = newMat->rmap->n; /* Set by user */
2968 
2969   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2970   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2971 
2972   /* First process needs enough room for process with most rows */
2973   if (!rank) {
2974     mmax = rowners[1];
2975     for (i=2; i<=size; i++) {
2976       mmax = PetscMax(mmax, rowners[i]);
2977     }
2978   } else mmax = -1;             /* unused, but compilers complain */
2979 
2980   rowners[0] = 0;
2981   for (i=2; i<=size; i++) {
2982     rowners[i] += rowners[i-1];
2983   }
2984   rstart = rowners[rank];
2985   rend   = rowners[rank+1];
2986 
2987   /* distribute row lengths to all processors */
2988   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2989   if (!rank) {
2990     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
2991     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2992     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2993     for (j=0; j<m; j++) {
2994       procsnz[0] += ourlens[j];
2995     }
2996     for (i=1; i<size; i++) {
2997       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
2998       /* calculate the number of nonzeros on each processor */
2999       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3000         procsnz[i] += rowlengths[j];
3001       }
3002       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3003     }
3004     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3005   } else {
3006     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3007   }
3008 
3009   if (!rank) {
3010     /* determine max buffer needed and allocate it */
3011     maxnz = 0;
3012     for (i=0; i<size; i++) {
3013       maxnz = PetscMax(maxnz,procsnz[i]);
3014     }
3015     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3016 
3017     /* read in my part of the matrix column indices  */
3018     nz   = procsnz[0];
3019     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3020     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3021 
3022     /* read in every one elses and ship off */
3023     for (i=1; i<size; i++) {
3024       nz   = procsnz[i];
3025       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3026       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3027     }
3028     ierr = PetscFree(cols);CHKERRQ(ierr);
3029   } else {
3030     /* determine buffer space needed for message */
3031     nz = 0;
3032     for (i=0; i<m; i++) {
3033       nz += ourlens[i];
3034     }
3035     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3036 
3037     /* receive message of column indices*/
3038     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3039   }
3040 
3041   /* determine column ownership if matrix is not square */
3042   if (N != M) {
3043     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3044     else n = newMat->cmap->n;
3045     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3046     cstart = cend - n;
3047   } else {
3048     cstart = rstart;
3049     cend   = rend;
3050     n      = cend - cstart;
3051   }
3052 
3053   /* loop over local rows, determining number of off diagonal entries */
3054   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3055   jj   = 0;
3056   for (i=0; i<m; i++) {
3057     for (j=0; j<ourlens[i]; j++) {
3058       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3059       jj++;
3060     }
3061   }
3062 
3063   for (i=0; i<m; i++) {
3064     ourlens[i] -= offlens[i];
3065   }
3066   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3067 
3068   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3069 
3070   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3071 
3072   for (i=0; i<m; i++) {
3073     ourlens[i] += offlens[i];
3074   }
3075 
3076   if (!rank) {
3077     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3078 
3079     /* read in my part of the matrix numerical values  */
3080     nz   = procsnz[0];
3081     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3082 
3083     /* insert into matrix */
3084     jj      = rstart;
3085     smycols = mycols;
3086     svals   = vals;
3087     for (i=0; i<m; i++) {
3088       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3089       smycols += ourlens[i];
3090       svals   += ourlens[i];
3091       jj++;
3092     }
3093 
3094     /* read in other processors and ship out */
3095     for (i=1; i<size; i++) {
3096       nz   = procsnz[i];
3097       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3098       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3099     }
3100     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3101   } else {
3102     /* receive numeric values */
3103     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3104 
3105     /* receive message of values*/
3106     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3107 
3108     /* insert into matrix */
3109     jj      = rstart;
3110     smycols = mycols;
3111     svals   = vals;
3112     for (i=0; i<m; i++) {
3113       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3114       smycols += ourlens[i];
3115       svals   += ourlens[i];
3116       jj++;
3117     }
3118   }
3119   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3120   ierr = PetscFree(vals);CHKERRQ(ierr);
3121   ierr = PetscFree(mycols);CHKERRQ(ierr);
3122   ierr = PetscFree(rowners);CHKERRQ(ierr);
3123   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3124   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 /* Not scalable because of ISAllGather() unless getting all columns. */
3129 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3130 {
3131   PetscErrorCode ierr;
3132   IS             iscol_local;
3133   PetscBool      isstride;
3134   PetscMPIInt    lisstride=0,gisstride;
3135 
3136   PetscFunctionBegin;
3137   /* check if we are grabbing all columns*/
3138   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3139 
3140   if (isstride) {
3141     PetscInt  start,len,mstart,mlen;
3142     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3143     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3144     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3145     if (mstart == start && mlen-mstart == len) lisstride = 1;
3146   }
3147 
3148   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3149   if (gisstride) {
3150     PetscInt N;
3151     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3152     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3153     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3154     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3155   } else {
3156     PetscInt cbs;
3157     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3158     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3159     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3160   }
3161 
3162   *isseq = iscol_local;
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 /*
3167  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3168  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3169 
3170  Input Parameters:
3171    mat - matrix
3172    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3173            i.e., mat->rstart <= isrow[i] < mat->rend
3174    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3175            i.e., mat->cstart <= iscol[i] < mat->cend
3176  Output Parameter:
3177    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3178    iscol_o - sequential column index set for retrieving mat->B
3179    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3180  */
3181 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3182 {
3183   PetscErrorCode ierr;
3184   Vec            x,cmap;
3185   const PetscInt *is_idx;
3186   PetscScalar    *xarray,*cmaparray;
3187   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3188   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3189   Mat            B=a->B;
3190   Vec            lvec=a->lvec,lcmap;
3191   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3192   MPI_Comm       comm;
3193   VecScatter     Mvctx=a->Mvctx;
3194 
3195   PetscFunctionBegin;
3196   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3198 
3199   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3200   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3201   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3202   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3203   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3204 
3205   /* Get start indices */
3206   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3207   isstart -= ncols;
3208   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3209 
3210   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3211   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3212   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3213   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3214   for (i=0; i<ncols; i++) {
3215     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3216     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3217     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3218   }
3219   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3220   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3221   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3222 
3223   /* Get iscol_d */
3224   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3225   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3226   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3227 
3228   /* Get isrow_d */
3229   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3230   rstart = mat->rmap->rstart;
3231   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3232   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3233   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3234   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3235 
3236   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3237   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3238   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3239 
3240   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3241   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3242   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3243 
3244   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3245 
3246   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3247   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3248 
3249   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3250   /* off-process column indices */
3251   count = 0;
3252   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3253   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3254 
3255   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3256   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3257   for (i=0; i<Bn; i++) {
3258     if (PetscRealPart(xarray[i]) > -1.0) {
3259       idx[count]     = i;                   /* local column index in off-diagonal part B */
3260       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3261       count++;
3262     }
3263   }
3264   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3265   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3266 
3267   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3268   /* cannot ensure iscol_o has same blocksize as iscol! */
3269 
3270   ierr = PetscFree(idx);CHKERRQ(ierr);
3271   *garray = cmap1;
3272 
3273   ierr = VecDestroy(&x);CHKERRQ(ierr);
3274   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3275   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3280 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3281 {
3282   PetscErrorCode ierr;
3283   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3284   Mat            M = NULL;
3285   MPI_Comm       comm;
3286   IS             iscol_d,isrow_d,iscol_o;
3287   Mat            Asub = NULL,Bsub = NULL;
3288   PetscInt       n;
3289 
3290   PetscFunctionBegin;
3291   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3292 
3293   if (call == MAT_REUSE_MATRIX) {
3294     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3295     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3296     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3297 
3298     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3299     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3300 
3301     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3302     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3303 
3304     /* Update diagonal and off-diagonal portions of submat */
3305     asub = (Mat_MPIAIJ*)(*submat)->data;
3306     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3307     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3308     if (n) {
3309       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3310     }
3311     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3313 
3314   } else { /* call == MAT_INITIAL_MATRIX) */
3315     const PetscInt *garray;
3316     PetscInt        BsubN;
3317 
3318     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3319     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3320 
3321     /* Create local submatrices Asub and Bsub */
3322     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3323     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3324 
3325     /* Create submatrix M */
3326     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3327 
3328     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3329     asub = (Mat_MPIAIJ*)M->data;
3330 
3331     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3332     n = asub->B->cmap->N;
3333     if (BsubN > n) {
3334       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3335       const PetscInt *idx;
3336       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3337       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3338 
3339       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3340       j = 0;
3341       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3342       for (i=0; i<n; i++) {
3343         if (j >= BsubN) break;
3344         while (subgarray[i] > garray[j]) j++;
3345 
3346         if (subgarray[i] == garray[j]) {
3347           idx_new[i] = idx[j++];
3348         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3349       }
3350       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3351 
3352       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3353       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3354 
3355     } else if (BsubN < n) {
3356       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3357     }
3358 
3359     ierr = PetscFree(garray);CHKERRQ(ierr);
3360     *submat = M;
3361 
3362     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3363     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3364     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3365 
3366     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3367     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3368 
3369     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3370     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3371   }
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3376 {
3377   PetscErrorCode ierr;
3378   IS             iscol_local=NULL,isrow_d;
3379   PetscInt       csize;
3380   PetscInt       n,i,j,start,end;
3381   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3382   MPI_Comm       comm;
3383 
3384   PetscFunctionBegin;
3385   /* If isrow has same processor distribution as mat,
3386      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3387   if (call == MAT_REUSE_MATRIX) {
3388     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3389     if (isrow_d) {
3390       sameRowDist  = PETSC_TRUE;
3391       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3392     } else {
3393       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3394       if (iscol_local) {
3395         sameRowDist  = PETSC_TRUE;
3396         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3397       }
3398     }
3399   } else {
3400     /* Check if isrow has same processor distribution as mat */
3401     sameDist[0] = PETSC_FALSE;
3402     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3403     if (!n) {
3404       sameDist[0] = PETSC_TRUE;
3405     } else {
3406       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3407       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3408       if (i >= start && j < end) {
3409         sameDist[0] = PETSC_TRUE;
3410       }
3411     }
3412 
3413     /* Check if iscol has same processor distribution as mat */
3414     sameDist[1] = PETSC_FALSE;
3415     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3416     if (!n) {
3417       sameDist[1] = PETSC_TRUE;
3418     } else {
3419       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3420       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3421       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3422     }
3423 
3424     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3425     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3426     sameRowDist = tsameDist[0];
3427   }
3428 
3429   if (sameRowDist) {
3430     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3431       /* isrow and iscol have same processor distribution as mat */
3432       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3433       PetscFunctionReturn(0);
3434     } else { /* sameRowDist */
3435       /* isrow has same processor distribution as mat */
3436       if (call == MAT_INITIAL_MATRIX) {
3437         PetscBool sorted;
3438         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3439         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3440         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3441         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3442 
3443         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3444         if (sorted) {
3445           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3446           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3447           PetscFunctionReturn(0);
3448         }
3449       } else { /* call == MAT_REUSE_MATRIX */
3450         IS    iscol_sub;
3451         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3452         if (iscol_sub) {
3453           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3454           PetscFunctionReturn(0);
3455         }
3456       }
3457     }
3458   }
3459 
3460   /* General case: iscol -> iscol_local which has global size of iscol */
3461   if (call == MAT_REUSE_MATRIX) {
3462     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3463     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3464   } else {
3465     if (!iscol_local) {
3466       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3467     }
3468   }
3469 
3470   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3471   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3472 
3473   if (call == MAT_INITIAL_MATRIX) {
3474     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3475     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3476   }
3477   PetscFunctionReturn(0);
3478 }
3479 
3480 /*@C
3481      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3482          and "off-diagonal" part of the matrix in CSR format.
3483 
3484    Collective on MPI_Comm
3485 
3486    Input Parameters:
3487 +  comm - MPI communicator
3488 .  A - "diagonal" portion of matrix
3489 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3490 -  garray - global index of B columns
3491 
3492    Output Parameter:
3493 .   mat - the matrix, with input A as its local diagonal matrix
3494    Level: advanced
3495 
3496    Notes:
3497        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3498        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3499 
3500 .seealso: MatCreateMPIAIJWithSplitArrays()
3501 @*/
3502 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3503 {
3504   PetscErrorCode ierr;
3505   Mat_MPIAIJ     *maij;
3506   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3507   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3508   PetscScalar    *oa=b->a;
3509   Mat            Bnew;
3510   PetscInt       m,n,N;
3511 
3512   PetscFunctionBegin;
3513   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3514   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3515   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3516   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3517   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3518   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3519 
3520   /* Get global columns of mat */
3521   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3522 
3523   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3524   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3525   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3526   maij = (Mat_MPIAIJ*)(*mat)->data;
3527 
3528   (*mat)->preallocated = PETSC_TRUE;
3529 
3530   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3531   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3532 
3533   /* Set A as diagonal portion of *mat */
3534   maij->A = A;
3535 
3536   nz = oi[m];
3537   for (i=0; i<nz; i++) {
3538     col   = oj[i];
3539     oj[i] = garray[col];
3540   }
3541 
3542    /* Set Bnew as off-diagonal portion of *mat */
3543   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3544   bnew        = (Mat_SeqAIJ*)Bnew->data;
3545   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3546   maij->B     = Bnew;
3547 
3548   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3549 
3550   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3551   b->free_a       = PETSC_FALSE;
3552   b->free_ij      = PETSC_FALSE;
3553   ierr = MatDestroy(&B);CHKERRQ(ierr);
3554 
3555   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3556   bnew->free_a       = PETSC_TRUE;
3557   bnew->free_ij      = PETSC_TRUE;
3558 
3559   /* condense columns of maij->B */
3560   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3561   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3562   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3563   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3564   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3565   PetscFunctionReturn(0);
3566 }
3567 
3568 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3569 
3570 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3571 {
3572   PetscErrorCode ierr;
3573   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3574   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3575   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3576   Mat            M,Msub,B=a->B;
3577   MatScalar      *aa;
3578   Mat_SeqAIJ     *aij;
3579   PetscInt       *garray = a->garray,*colsub,Ncols;
3580   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3581   IS             iscol_sub,iscmap;
3582   const PetscInt *is_idx,*cmap;
3583   PetscBool      allcolumns=PETSC_FALSE;
3584   MPI_Comm       comm;
3585 
3586   PetscFunctionBegin;
3587   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3588 
3589   if (call == MAT_REUSE_MATRIX) {
3590     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3591     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3592     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3593 
3594     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3595     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3596 
3597     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3598     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3599 
3600     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3601 
3602   } else { /* call == MAT_INITIAL_MATRIX) */
3603     PetscBool flg;
3604 
3605     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3606     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3607 
3608     /* (1) iscol -> nonscalable iscol_local */
3609     /* Check for special case: each processor gets entire matrix columns */
3610     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3611     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3612     if (allcolumns) {
3613       iscol_sub = iscol_local;
3614       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3615       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3616 
3617     } else {
3618       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3619       PetscInt *idx,*cmap1,k;
3620       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3621       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3622       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3623       count = 0;
3624       k     = 0;
3625       for (i=0; i<Ncols; i++) {
3626         j = is_idx[i];
3627         if (j >= cstart && j < cend) {
3628           /* diagonal part of mat */
3629           idx[count]     = j;
3630           cmap1[count++] = i; /* column index in submat */
3631         } else if (Bn) {
3632           /* off-diagonal part of mat */
3633           if (j == garray[k]) {
3634             idx[count]     = j;
3635             cmap1[count++] = i;  /* column index in submat */
3636           } else if (j > garray[k]) {
3637             while (j > garray[k] && k < Bn-1) k++;
3638             if (j == garray[k]) {
3639               idx[count]     = j;
3640               cmap1[count++] = i; /* column index in submat */
3641             }
3642           }
3643         }
3644       }
3645       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3646 
3647       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3648       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3649       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3650 
3651       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3652     }
3653 
3654     /* (3) Create sequential Msub */
3655     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3656   }
3657 
3658   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3659   aij  = (Mat_SeqAIJ*)(Msub)->data;
3660   ii   = aij->i;
3661   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3662 
3663   /*
3664       m - number of local rows
3665       Ncols - number of columns (same on all processors)
3666       rstart - first row in new global matrix generated
3667   */
3668   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3669 
3670   if (call == MAT_INITIAL_MATRIX) {
3671     /* (4) Create parallel newmat */
3672     PetscMPIInt    rank,size;
3673     PetscInt       csize;
3674 
3675     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3676     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3677 
3678     /*
3679         Determine the number of non-zeros in the diagonal and off-diagonal
3680         portions of the matrix in order to do correct preallocation
3681     */
3682 
3683     /* first get start and end of "diagonal" columns */
3684     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3685     if (csize == PETSC_DECIDE) {
3686       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3687       if (mglobal == Ncols) { /* square matrix */
3688         nlocal = m;
3689       } else {
3690         nlocal = Ncols/size + ((Ncols % size) > rank);
3691       }
3692     } else {
3693       nlocal = csize;
3694     }
3695     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3696     rstart = rend - nlocal;
3697     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3698 
3699     /* next, compute all the lengths */
3700     jj    = aij->j;
3701     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3702     olens = dlens + m;
3703     for (i=0; i<m; i++) {
3704       jend = ii[i+1] - ii[i];
3705       olen = 0;
3706       dlen = 0;
3707       for (j=0; j<jend; j++) {
3708         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3709         else dlen++;
3710         jj++;
3711       }
3712       olens[i] = olen;
3713       dlens[i] = dlen;
3714     }
3715 
3716     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3717     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3718 
3719     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3720     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3721     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3722     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3723     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3724     ierr = PetscFree(dlens);CHKERRQ(ierr);
3725 
3726   } else { /* call == MAT_REUSE_MATRIX */
3727     M    = *newmat;
3728     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3729     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3730     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3731     /*
3732          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3733        rather than the slower MatSetValues().
3734     */
3735     M->was_assembled = PETSC_TRUE;
3736     M->assembled     = PETSC_FALSE;
3737   }
3738 
3739   /* (5) Set values of Msub to *newmat */
3740   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3741   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3742 
3743   jj   = aij->j;
3744   aa   = aij->a;
3745   for (i=0; i<m; i++) {
3746     row = rstart + i;
3747     nz  = ii[i+1] - ii[i];
3748     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3749     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3750     jj += nz; aa += nz;
3751   }
3752   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3753 
3754   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3756 
3757   ierr = PetscFree(colsub);CHKERRQ(ierr);
3758 
3759   /* save Msub, iscol_sub and iscmap used in processor for next request */
3760   if (call ==  MAT_INITIAL_MATRIX) {
3761     *newmat = M;
3762     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3763     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3764 
3765     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3766     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3767 
3768     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3769     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3770 
3771     if (iscol_local) {
3772       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3773       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3774     }
3775   }
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 /*
3780     Not great since it makes two copies of the submatrix, first an SeqAIJ
3781   in local and then by concatenating the local matrices the end result.
3782   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3783 
3784   Note: This requires a sequential iscol with all indices.
3785 */
3786 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3787 {
3788   PetscErrorCode ierr;
3789   PetscMPIInt    rank,size;
3790   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3791   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3792   Mat            M,Mreuse;
3793   MatScalar      *aa,*vwork;
3794   MPI_Comm       comm;
3795   Mat_SeqAIJ     *aij;
3796   PetscBool      colflag,allcolumns=PETSC_FALSE;
3797 
3798   PetscFunctionBegin;
3799   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3800   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3801   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3802 
3803   /* Check for special case: each processor gets entire matrix columns */
3804   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3805   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3806   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3807 
3808   if (call ==  MAT_REUSE_MATRIX) {
3809     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3810     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3811     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3812   } else {
3813     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3814   }
3815 
3816   /*
3817       m - number of local rows
3818       n - number of columns (same on all processors)
3819       rstart - first row in new global matrix generated
3820   */
3821   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3822   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3823   if (call == MAT_INITIAL_MATRIX) {
3824     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3825     ii  = aij->i;
3826     jj  = aij->j;
3827 
3828     /*
3829         Determine the number of non-zeros in the diagonal and off-diagonal
3830         portions of the matrix in order to do correct preallocation
3831     */
3832 
3833     /* first get start and end of "diagonal" columns */
3834     if (csize == PETSC_DECIDE) {
3835       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3836       if (mglobal == n) { /* square matrix */
3837         nlocal = m;
3838       } else {
3839         nlocal = n/size + ((n % size) > rank);
3840       }
3841     } else {
3842       nlocal = csize;
3843     }
3844     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3845     rstart = rend - nlocal;
3846     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3847 
3848     /* next, compute all the lengths */
3849     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3850     olens = dlens + m;
3851     for (i=0; i<m; i++) {
3852       jend = ii[i+1] - ii[i];
3853       olen = 0;
3854       dlen = 0;
3855       for (j=0; j<jend; j++) {
3856         if (*jj < rstart || *jj >= rend) olen++;
3857         else dlen++;
3858         jj++;
3859       }
3860       olens[i] = olen;
3861       dlens[i] = dlen;
3862     }
3863     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3864     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3865     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3866     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3867     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3868     ierr = PetscFree(dlens);CHKERRQ(ierr);
3869   } else {
3870     PetscInt ml,nl;
3871 
3872     M    = *newmat;
3873     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3874     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3875     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3876     /*
3877          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3878        rather than the slower MatSetValues().
3879     */
3880     M->was_assembled = PETSC_TRUE;
3881     M->assembled     = PETSC_FALSE;
3882   }
3883   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3884   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3885   ii   = aij->i;
3886   jj   = aij->j;
3887   aa   = aij->a;
3888   for (i=0; i<m; i++) {
3889     row   = rstart + i;
3890     nz    = ii[i+1] - ii[i];
3891     cwork = jj;     jj += nz;
3892     vwork = aa;     aa += nz;
3893     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3894   }
3895 
3896   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3897   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3898   *newmat = M;
3899 
3900   /* save submatrix used in processor for next request */
3901   if (call ==  MAT_INITIAL_MATRIX) {
3902     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3903     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3904   }
3905   PetscFunctionReturn(0);
3906 }
3907 
3908 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3909 {
3910   PetscInt       m,cstart, cend,j,nnz,i,d;
3911   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3912   const PetscInt *JJ;
3913   PetscScalar    *values;
3914   PetscErrorCode ierr;
3915   PetscBool      nooffprocentries;
3916 
3917   PetscFunctionBegin;
3918   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3919 
3920   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3921   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3922   m      = B->rmap->n;
3923   cstart = B->cmap->rstart;
3924   cend   = B->cmap->rend;
3925   rstart = B->rmap->rstart;
3926 
3927   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3928 
3929 #if defined(PETSC_USE_DEBUG)
3930   for (i=0; i<m && Ii; i++) {
3931     nnz = Ii[i+1]- Ii[i];
3932     JJ  = J + Ii[i];
3933     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3934     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3935     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3936   }
3937 #endif
3938 
3939   for (i=0; i<m && Ii; i++) {
3940     nnz     = Ii[i+1]- Ii[i];
3941     JJ      = J + Ii[i];
3942     nnz_max = PetscMax(nnz_max,nnz);
3943     d       = 0;
3944     for (j=0; j<nnz; j++) {
3945       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3946     }
3947     d_nnz[i] = d;
3948     o_nnz[i] = nnz - d;
3949   }
3950   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3951   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3952 
3953   if (v) values = (PetscScalar*)v;
3954   else {
3955     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3956   }
3957 
3958   for (i=0; i<m && Ii; i++) {
3959     ii   = i + rstart;
3960     nnz  = Ii[i+1]- Ii[i];
3961     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3962   }
3963   nooffprocentries    = B->nooffprocentries;
3964   B->nooffprocentries = PETSC_TRUE;
3965   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3966   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3967   B->nooffprocentries = nooffprocentries;
3968 
3969   if (!v) {
3970     ierr = PetscFree(values);CHKERRQ(ierr);
3971   }
3972   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3973   PetscFunctionReturn(0);
3974 }
3975 
3976 /*@
3977    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3978    (the default parallel PETSc format).
3979 
3980    Collective on MPI_Comm
3981 
3982    Input Parameters:
3983 +  B - the matrix
3984 .  i - the indices into j for the start of each local row (starts with zero)
3985 .  j - the column indices for each local row (starts with zero)
3986 -  v - optional values in the matrix
3987 
3988    Level: developer
3989 
3990    Notes:
3991        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3992      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3993      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3994 
3995        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3996 
3997        The format which is used for the sparse matrix input, is equivalent to a
3998     row-major ordering.. i.e for the following matrix, the input data expected is
3999     as shown
4000 
4001 $        1 0 0
4002 $        2 0 3     P0
4003 $       -------
4004 $        4 5 6     P1
4005 $
4006 $     Process0 [P0]: rows_owned=[0,1]
4007 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4008 $        j =  {0,0,2}  [size = 3]
4009 $        v =  {1,2,3}  [size = 3]
4010 $
4011 $     Process1 [P1]: rows_owned=[2]
4012 $        i =  {0,3}    [size = nrow+1  = 1+1]
4013 $        j =  {0,1,2}  [size = 3]
4014 $        v =  {4,5,6}  [size = 3]
4015 
4016 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4017           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4018 @*/
4019 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4020 {
4021   PetscErrorCode ierr;
4022 
4023   PetscFunctionBegin;
4024   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4025   PetscFunctionReturn(0);
4026 }
4027 
4028 /*@C
4029    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4030    (the default parallel PETSc format).  For good matrix assembly performance
4031    the user should preallocate the matrix storage by setting the parameters
4032    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4033    performance can be increased by more than a factor of 50.
4034 
4035    Collective on MPI_Comm
4036 
4037    Input Parameters:
4038 +  B - the matrix
4039 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4040            (same value is used for all local rows)
4041 .  d_nnz - array containing the number of nonzeros in the various rows of the
4042            DIAGONAL portion of the local submatrix (possibly different for each row)
4043            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4044            The size of this array is equal to the number of local rows, i.e 'm'.
4045            For matrices that will be factored, you must leave room for (and set)
4046            the diagonal entry even if it is zero.
4047 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4048            submatrix (same value is used for all local rows).
4049 -  o_nnz - array containing the number of nonzeros in the various rows of the
4050            OFF-DIAGONAL portion of the local submatrix (possibly different for
4051            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4052            structure. The size of this array is equal to the number
4053            of local rows, i.e 'm'.
4054 
4055    If the *_nnz parameter is given then the *_nz parameter is ignored
4056 
4057    The AIJ format (also called the Yale sparse matrix format or
4058    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4059    storage.  The stored row and column indices begin with zero.
4060    See Users-Manual: ch_mat for details.
4061 
4062    The parallel matrix is partitioned such that the first m0 rows belong to
4063    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4064    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4065 
4066    The DIAGONAL portion of the local submatrix of a processor can be defined
4067    as the submatrix which is obtained by extraction the part corresponding to
4068    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4069    first row that belongs to the processor, r2 is the last row belonging to
4070    the this processor, and c1-c2 is range of indices of the local part of a
4071    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4072    common case of a square matrix, the row and column ranges are the same and
4073    the DIAGONAL part is also square. The remaining portion of the local
4074    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4075 
4076    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4077 
4078    You can call MatGetInfo() to get information on how effective the preallocation was;
4079    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4080    You can also run with the option -info and look for messages with the string
4081    malloc in them to see if additional memory allocation was needed.
4082 
4083    Example usage:
4084 
4085    Consider the following 8x8 matrix with 34 non-zero values, that is
4086    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4087    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4088    as follows:
4089 
4090 .vb
4091             1  2  0  |  0  3  0  |  0  4
4092     Proc0   0  5  6  |  7  0  0  |  8  0
4093             9  0 10  | 11  0  0  | 12  0
4094     -------------------------------------
4095            13  0 14  | 15 16 17  |  0  0
4096     Proc1   0 18  0  | 19 20 21  |  0  0
4097             0  0  0  | 22 23  0  | 24  0
4098     -------------------------------------
4099     Proc2  25 26 27  |  0  0 28  | 29  0
4100            30  0  0  | 31 32 33  |  0 34
4101 .ve
4102 
4103    This can be represented as a collection of submatrices as:
4104 
4105 .vb
4106       A B C
4107       D E F
4108       G H I
4109 .ve
4110 
4111    Where the submatrices A,B,C are owned by proc0, D,E,F are
4112    owned by proc1, G,H,I are owned by proc2.
4113 
4114    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4115    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4116    The 'M','N' parameters are 8,8, and have the same values on all procs.
4117 
4118    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4119    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4120    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4121    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4122    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4123    matrix, ans [DF] as another SeqAIJ matrix.
4124 
4125    When d_nz, o_nz parameters are specified, d_nz storage elements are
4126    allocated for every row of the local diagonal submatrix, and o_nz
4127    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4128    One way to choose d_nz and o_nz is to use the max nonzerors per local
4129    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4130    In this case, the values of d_nz,o_nz are:
4131 .vb
4132      proc0 : dnz = 2, o_nz = 2
4133      proc1 : dnz = 3, o_nz = 2
4134      proc2 : dnz = 1, o_nz = 4
4135 .ve
4136    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4137    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4138    for proc3. i.e we are using 12+15+10=37 storage locations to store
4139    34 values.
4140 
4141    When d_nnz, o_nnz parameters are specified, the storage is specified
4142    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4143    In the above case the values for d_nnz,o_nnz are:
4144 .vb
4145      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4146      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4147      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4148 .ve
4149    Here the space allocated is sum of all the above values i.e 34, and
4150    hence pre-allocation is perfect.
4151 
4152    Level: intermediate
4153 
4154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4155           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4156 @*/
4157 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4158 {
4159   PetscErrorCode ierr;
4160 
4161   PetscFunctionBegin;
4162   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4163   PetscValidType(B,1);
4164   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 /*@
4169      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4170          CSR format the local rows.
4171 
4172    Collective on MPI_Comm
4173 
4174    Input Parameters:
4175 +  comm - MPI communicator
4176 .  m - number of local rows (Cannot be PETSC_DECIDE)
4177 .  n - This value should be the same as the local size used in creating the
4178        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4179        calculated if N is given) For square matrices n is almost always m.
4180 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4181 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4182 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4183 .   j - column indices
4184 -   a - matrix values
4185 
4186    Output Parameter:
4187 .   mat - the matrix
4188 
4189    Level: intermediate
4190 
4191    Notes:
4192        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4193      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4194      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4195 
4196        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4197 
4198        The format which is used for the sparse matrix input, is equivalent to a
4199     row-major ordering.. i.e for the following matrix, the input data expected is
4200     as shown
4201 
4202 $        1 0 0
4203 $        2 0 3     P0
4204 $       -------
4205 $        4 5 6     P1
4206 $
4207 $     Process0 [P0]: rows_owned=[0,1]
4208 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4209 $        j =  {0,0,2}  [size = 3]
4210 $        v =  {1,2,3}  [size = 3]
4211 $
4212 $     Process1 [P1]: rows_owned=[2]
4213 $        i =  {0,3}    [size = nrow+1  = 1+1]
4214 $        j =  {0,1,2}  [size = 3]
4215 $        v =  {4,5,6}  [size = 3]
4216 
4217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4218           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4219 @*/
4220 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4221 {
4222   PetscErrorCode ierr;
4223 
4224   PetscFunctionBegin;
4225   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4226   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4227   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4228   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4229   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4230   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4231   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4232   PetscFunctionReturn(0);
4233 }
4234 
4235 /*@C
4236    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4237    (the default parallel PETSc format).  For good matrix assembly performance
4238    the user should preallocate the matrix storage by setting the parameters
4239    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4240    performance can be increased by more than a factor of 50.
4241 
4242    Collective on MPI_Comm
4243 
4244    Input Parameters:
4245 +  comm - MPI communicator
4246 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4247            This value should be the same as the local size used in creating the
4248            y vector for the matrix-vector product y = Ax.
4249 .  n - This value should be the same as the local size used in creating the
4250        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4251        calculated if N is given) For square matrices n is almost always m.
4252 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4253 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4254 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4255            (same value is used for all local rows)
4256 .  d_nnz - array containing the number of nonzeros in the various rows of the
4257            DIAGONAL portion of the local submatrix (possibly different for each row)
4258            or NULL, if d_nz is used to specify the nonzero structure.
4259            The size of this array is equal to the number of local rows, i.e 'm'.
4260 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4261            submatrix (same value is used for all local rows).
4262 -  o_nnz - array containing the number of nonzeros in the various rows of the
4263            OFF-DIAGONAL portion of the local submatrix (possibly different for
4264            each row) or NULL, if o_nz is used to specify the nonzero
4265            structure. The size of this array is equal to the number
4266            of local rows, i.e 'm'.
4267 
4268    Output Parameter:
4269 .  A - the matrix
4270 
4271    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4272    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4273    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4274 
4275    Notes:
4276    If the *_nnz parameter is given then the *_nz parameter is ignored
4277 
4278    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4279    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4280    storage requirements for this matrix.
4281 
4282    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4283    processor than it must be used on all processors that share the object for
4284    that argument.
4285 
4286    The user MUST specify either the local or global matrix dimensions
4287    (possibly both).
4288 
4289    The parallel matrix is partitioned across processors such that the
4290    first m0 rows belong to process 0, the next m1 rows belong to
4291    process 1, the next m2 rows belong to process 2 etc.. where
4292    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4293    values corresponding to [m x N] submatrix.
4294 
4295    The columns are logically partitioned with the n0 columns belonging
4296    to 0th partition, the next n1 columns belonging to the next
4297    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4298 
4299    The DIAGONAL portion of the local submatrix on any given processor
4300    is the submatrix corresponding to the rows and columns m,n
4301    corresponding to the given processor. i.e diagonal matrix on
4302    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4303    etc. The remaining portion of the local submatrix [m x (N-n)]
4304    constitute the OFF-DIAGONAL portion. The example below better
4305    illustrates this concept.
4306 
4307    For a square global matrix we define each processor's diagonal portion
4308    to be its local rows and the corresponding columns (a square submatrix);
4309    each processor's off-diagonal portion encompasses the remainder of the
4310    local matrix (a rectangular submatrix).
4311 
4312    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4313 
4314    When calling this routine with a single process communicator, a matrix of
4315    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4316    type of communicator, use the construction mechanism
4317 .vb
4318      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4319 .ve
4320 
4321 $     MatCreate(...,&A);
4322 $     MatSetType(A,MATMPIAIJ);
4323 $     MatSetSizes(A, m,n,M,N);
4324 $     MatMPIAIJSetPreallocation(A,...);
4325 
4326    By default, this format uses inodes (identical nodes) when possible.
4327    We search for consecutive rows with the same nonzero structure, thereby
4328    reusing matrix information to achieve increased efficiency.
4329 
4330    Options Database Keys:
4331 +  -mat_no_inode  - Do not use inodes
4332 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4333 
4334 
4335 
4336    Example usage:
4337 
4338    Consider the following 8x8 matrix with 34 non-zero values, that is
4339    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4340    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4341    as follows
4342 
4343 .vb
4344             1  2  0  |  0  3  0  |  0  4
4345     Proc0   0  5  6  |  7  0  0  |  8  0
4346             9  0 10  | 11  0  0  | 12  0
4347     -------------------------------------
4348            13  0 14  | 15 16 17  |  0  0
4349     Proc1   0 18  0  | 19 20 21  |  0  0
4350             0  0  0  | 22 23  0  | 24  0
4351     -------------------------------------
4352     Proc2  25 26 27  |  0  0 28  | 29  0
4353            30  0  0  | 31 32 33  |  0 34
4354 .ve
4355 
4356    This can be represented as a collection of submatrices as
4357 
4358 .vb
4359       A B C
4360       D E F
4361       G H I
4362 .ve
4363 
4364    Where the submatrices A,B,C are owned by proc0, D,E,F are
4365    owned by proc1, G,H,I are owned by proc2.
4366 
4367    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4368    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4369    The 'M','N' parameters are 8,8, and have the same values on all procs.
4370 
4371    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4372    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4373    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4374    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4375    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4376    matrix, ans [DF] as another SeqAIJ matrix.
4377 
4378    When d_nz, o_nz parameters are specified, d_nz storage elements are
4379    allocated for every row of the local diagonal submatrix, and o_nz
4380    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4381    One way to choose d_nz and o_nz is to use the max nonzerors per local
4382    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4383    In this case, the values of d_nz,o_nz are
4384 .vb
4385      proc0 : dnz = 2, o_nz = 2
4386      proc1 : dnz = 3, o_nz = 2
4387      proc2 : dnz = 1, o_nz = 4
4388 .ve
4389    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4390    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4391    for proc3. i.e we are using 12+15+10=37 storage locations to store
4392    34 values.
4393 
4394    When d_nnz, o_nnz parameters are specified, the storage is specified
4395    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4396    In the above case the values for d_nnz,o_nnz are
4397 .vb
4398      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4399      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4400      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4401 .ve
4402    Here the space allocated is sum of all the above values i.e 34, and
4403    hence pre-allocation is perfect.
4404 
4405    Level: intermediate
4406 
4407 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4408           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4409 @*/
4410 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4411 {
4412   PetscErrorCode ierr;
4413   PetscMPIInt    size;
4414 
4415   PetscFunctionBegin;
4416   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4417   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4418   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4419   if (size > 1) {
4420     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4421     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4422   } else {
4423     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4424     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4425   }
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4430 {
4431   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4432   PetscBool      flg;
4433   PetscErrorCode ierr;
4434 
4435   PetscFunctionBegin;
4436   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4437   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4438   if (Ad)     *Ad     = a->A;
4439   if (Ao)     *Ao     = a->B;
4440   if (colmap) *colmap = a->garray;
4441   PetscFunctionReturn(0);
4442 }
4443 
4444 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4445 {
4446   PetscErrorCode ierr;
4447   PetscInt       m,N,i,rstart,nnz,Ii;
4448   PetscInt       *indx;
4449   PetscScalar    *values;
4450 
4451   PetscFunctionBegin;
4452   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4453   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4454     PetscInt       *dnz,*onz,sum,bs,cbs;
4455 
4456     if (n == PETSC_DECIDE) {
4457       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4458     }
4459     /* Check sum(n) = N */
4460     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4461     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4462 
4463     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4464     rstart -= m;
4465 
4466     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4467     for (i=0; i<m; i++) {
4468       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4469       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4470       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4471     }
4472 
4473     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4474     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4475     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4476     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4477     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4478     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4479     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4480     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4481   }
4482 
4483   /* numeric phase */
4484   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4485   for (i=0; i<m; i++) {
4486     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4487     Ii   = i + rstart;
4488     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4489     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4490   }
4491   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4492   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4493   PetscFunctionReturn(0);
4494 }
4495 
4496 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4497 {
4498   PetscErrorCode    ierr;
4499   PetscMPIInt       rank;
4500   PetscInt          m,N,i,rstart,nnz;
4501   size_t            len;
4502   const PetscInt    *indx;
4503   PetscViewer       out;
4504   char              *name;
4505   Mat               B;
4506   const PetscScalar *values;
4507 
4508   PetscFunctionBegin;
4509   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4510   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4511   /* Should this be the type of the diagonal block of A? */
4512   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4513   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4514   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4515   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4516   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4517   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4518   for (i=0; i<m; i++) {
4519     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4520     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4521     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4522   }
4523   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4524   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4525 
4526   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4527   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4528   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4529   sprintf(name,"%s.%d",outfile,rank);
4530   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4531   ierr = PetscFree(name);CHKERRQ(ierr);
4532   ierr = MatView(B,out);CHKERRQ(ierr);
4533   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4534   ierr = MatDestroy(&B);CHKERRQ(ierr);
4535   PetscFunctionReturn(0);
4536 }
4537 
4538 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4539 {
4540   PetscErrorCode      ierr;
4541   Mat_Merge_SeqsToMPI *merge;
4542   PetscContainer      container;
4543 
4544   PetscFunctionBegin;
4545   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4546   if (container) {
4547     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4548     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4549     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4550     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4551     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4552     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4553     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4554     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4555     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4560     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4561     ierr = PetscFree(merge);CHKERRQ(ierr);
4562     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4563   }
4564   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4565   PetscFunctionReturn(0);
4566 }
4567 
4568 #include <../src/mat/utils/freespace.h>
4569 #include <petscbt.h>
4570 
4571 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4572 {
4573   PetscErrorCode      ierr;
4574   MPI_Comm            comm;
4575   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4576   PetscMPIInt         size,rank,taga,*len_s;
4577   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4578   PetscInt            proc,m;
4579   PetscInt            **buf_ri,**buf_rj;
4580   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4581   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4582   MPI_Request         *s_waits,*r_waits;
4583   MPI_Status          *status;
4584   MatScalar           *aa=a->a;
4585   MatScalar           **abuf_r,*ba_i;
4586   Mat_Merge_SeqsToMPI *merge;
4587   PetscContainer      container;
4588 
4589   PetscFunctionBegin;
4590   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4591   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4592 
4593   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4594   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4595 
4596   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4597   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4598 
4599   bi     = merge->bi;
4600   bj     = merge->bj;
4601   buf_ri = merge->buf_ri;
4602   buf_rj = merge->buf_rj;
4603 
4604   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4605   owners = merge->rowmap->range;
4606   len_s  = merge->len_s;
4607 
4608   /* send and recv matrix values */
4609   /*-----------------------------*/
4610   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4611   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4612 
4613   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4614   for (proc=0,k=0; proc<size; proc++) {
4615     if (!len_s[proc]) continue;
4616     i    = owners[proc];
4617     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4618     k++;
4619   }
4620 
4621   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4622   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4623   ierr = PetscFree(status);CHKERRQ(ierr);
4624 
4625   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4626   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4627 
4628   /* insert mat values of mpimat */
4629   /*----------------------------*/
4630   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4631   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4632 
4633   for (k=0; k<merge->nrecv; k++) {
4634     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4635     nrows       = *(buf_ri_k[k]);
4636     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4637     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4638   }
4639 
4640   /* set values of ba */
4641   m = merge->rowmap->n;
4642   for (i=0; i<m; i++) {
4643     arow = owners[rank] + i;
4644     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4645     bnzi = bi[i+1] - bi[i];
4646     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4647 
4648     /* add local non-zero vals of this proc's seqmat into ba */
4649     anzi   = ai[arow+1] - ai[arow];
4650     aj     = a->j + ai[arow];
4651     aa     = a->a + ai[arow];
4652     nextaj = 0;
4653     for (j=0; nextaj<anzi; j++) {
4654       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4655         ba_i[j] += aa[nextaj++];
4656       }
4657     }
4658 
4659     /* add received vals into ba */
4660     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4661       /* i-th row */
4662       if (i == *nextrow[k]) {
4663         anzi   = *(nextai[k]+1) - *nextai[k];
4664         aj     = buf_rj[k] + *(nextai[k]);
4665         aa     = abuf_r[k] + *(nextai[k]);
4666         nextaj = 0;
4667         for (j=0; nextaj<anzi; j++) {
4668           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4669             ba_i[j] += aa[nextaj++];
4670           }
4671         }
4672         nextrow[k]++; nextai[k]++;
4673       }
4674     }
4675     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4676   }
4677   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4678   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4679 
4680   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4681   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4682   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4683   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4684   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4685   PetscFunctionReturn(0);
4686 }
4687 
4688 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4689 {
4690   PetscErrorCode      ierr;
4691   Mat                 B_mpi;
4692   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4693   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4694   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4695   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4696   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4697   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4698   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4699   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4700   MPI_Status          *status;
4701   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4702   PetscBT             lnkbt;
4703   Mat_Merge_SeqsToMPI *merge;
4704   PetscContainer      container;
4705 
4706   PetscFunctionBegin;
4707   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4708 
4709   /* make sure it is a PETSc comm */
4710   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4711   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4712   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4713 
4714   ierr = PetscNew(&merge);CHKERRQ(ierr);
4715   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4716 
4717   /* determine row ownership */
4718   /*---------------------------------------------------------*/
4719   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4720   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4721   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4722   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4723   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4724   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4725   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4726 
4727   m      = merge->rowmap->n;
4728   owners = merge->rowmap->range;
4729 
4730   /* determine the number of messages to send, their lengths */
4731   /*---------------------------------------------------------*/
4732   len_s = merge->len_s;
4733 
4734   len          = 0; /* length of buf_si[] */
4735   merge->nsend = 0;
4736   for (proc=0; proc<size; proc++) {
4737     len_si[proc] = 0;
4738     if (proc == rank) {
4739       len_s[proc] = 0;
4740     } else {
4741       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4742       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4743     }
4744     if (len_s[proc]) {
4745       merge->nsend++;
4746       nrows = 0;
4747       for (i=owners[proc]; i<owners[proc+1]; i++) {
4748         if (ai[i+1] > ai[i]) nrows++;
4749       }
4750       len_si[proc] = 2*(nrows+1);
4751       len         += len_si[proc];
4752     }
4753   }
4754 
4755   /* determine the number and length of messages to receive for ij-structure */
4756   /*-------------------------------------------------------------------------*/
4757   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4758   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4759 
4760   /* post the Irecv of j-structure */
4761   /*-------------------------------*/
4762   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4763   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4764 
4765   /* post the Isend of j-structure */
4766   /*--------------------------------*/
4767   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4768 
4769   for (proc=0, k=0; proc<size; proc++) {
4770     if (!len_s[proc]) continue;
4771     i    = owners[proc];
4772     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4773     k++;
4774   }
4775 
4776   /* receives and sends of j-structure are complete */
4777   /*------------------------------------------------*/
4778   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4779   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4780 
4781   /* send and recv i-structure */
4782   /*---------------------------*/
4783   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4784   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4785 
4786   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4787   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4788   for (proc=0,k=0; proc<size; proc++) {
4789     if (!len_s[proc]) continue;
4790     /* form outgoing message for i-structure:
4791          buf_si[0]:                 nrows to be sent
4792                [1:nrows]:           row index (global)
4793                [nrows+1:2*nrows+1]: i-structure index
4794     */
4795     /*-------------------------------------------*/
4796     nrows       = len_si[proc]/2 - 1;
4797     buf_si_i    = buf_si + nrows+1;
4798     buf_si[0]   = nrows;
4799     buf_si_i[0] = 0;
4800     nrows       = 0;
4801     for (i=owners[proc]; i<owners[proc+1]; i++) {
4802       anzi = ai[i+1] - ai[i];
4803       if (anzi) {
4804         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4805         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4806         nrows++;
4807       }
4808     }
4809     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4810     k++;
4811     buf_si += len_si[proc];
4812   }
4813 
4814   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4815   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4816 
4817   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4818   for (i=0; i<merge->nrecv; i++) {
4819     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4820   }
4821 
4822   ierr = PetscFree(len_si);CHKERRQ(ierr);
4823   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4824   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4825   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4826   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4827   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4828   ierr = PetscFree(status);CHKERRQ(ierr);
4829 
4830   /* compute a local seq matrix in each processor */
4831   /*----------------------------------------------*/
4832   /* allocate bi array and free space for accumulating nonzero column info */
4833   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4834   bi[0] = 0;
4835 
4836   /* create and initialize a linked list */
4837   nlnk = N+1;
4838   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4839 
4840   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4841   len  = ai[owners[rank+1]] - ai[owners[rank]];
4842   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4843 
4844   current_space = free_space;
4845 
4846   /* determine symbolic info for each local row */
4847   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4848 
4849   for (k=0; k<merge->nrecv; k++) {
4850     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4851     nrows       = *buf_ri_k[k];
4852     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4853     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4854   }
4855 
4856   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4857   len  = 0;
4858   for (i=0; i<m; i++) {
4859     bnzi = 0;
4860     /* add local non-zero cols of this proc's seqmat into lnk */
4861     arow  = owners[rank] + i;
4862     anzi  = ai[arow+1] - ai[arow];
4863     aj    = a->j + ai[arow];
4864     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4865     bnzi += nlnk;
4866     /* add received col data into lnk */
4867     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4868       if (i == *nextrow[k]) { /* i-th row */
4869         anzi  = *(nextai[k]+1) - *nextai[k];
4870         aj    = buf_rj[k] + *nextai[k];
4871         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4872         bnzi += nlnk;
4873         nextrow[k]++; nextai[k]++;
4874       }
4875     }
4876     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4877 
4878     /* if free space is not available, make more free space */
4879     if (current_space->local_remaining<bnzi) {
4880       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4881       nspacedouble++;
4882     }
4883     /* copy data into free space, then initialize lnk */
4884     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4885     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4886 
4887     current_space->array           += bnzi;
4888     current_space->local_used      += bnzi;
4889     current_space->local_remaining -= bnzi;
4890 
4891     bi[i+1] = bi[i] + bnzi;
4892   }
4893 
4894   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4895 
4896   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4897   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4898   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4899 
4900   /* create symbolic parallel matrix B_mpi */
4901   /*---------------------------------------*/
4902   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4903   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4904   if (n==PETSC_DECIDE) {
4905     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4906   } else {
4907     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4908   }
4909   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4910   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4911   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4912   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4913   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4914 
4915   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4916   B_mpi->assembled    = PETSC_FALSE;
4917   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4918   merge->bi           = bi;
4919   merge->bj           = bj;
4920   merge->buf_ri       = buf_ri;
4921   merge->buf_rj       = buf_rj;
4922   merge->coi          = NULL;
4923   merge->coj          = NULL;
4924   merge->owners_co    = NULL;
4925 
4926   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4927 
4928   /* attach the supporting struct to B_mpi for reuse */
4929   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4930   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4931   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4932   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4933   *mpimat = B_mpi;
4934 
4935   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4936   PetscFunctionReturn(0);
4937 }
4938 
4939 /*@C
4940       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4941                  matrices from each processor
4942 
4943     Collective on MPI_Comm
4944 
4945    Input Parameters:
4946 +    comm - the communicators the parallel matrix will live on
4947 .    seqmat - the input sequential matrices
4948 .    m - number of local rows (or PETSC_DECIDE)
4949 .    n - number of local columns (or PETSC_DECIDE)
4950 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4951 
4952    Output Parameter:
4953 .    mpimat - the parallel matrix generated
4954 
4955     Level: advanced
4956 
4957    Notes:
4958      The dimensions of the sequential matrix in each processor MUST be the same.
4959      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4960      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4961 @*/
4962 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4963 {
4964   PetscErrorCode ierr;
4965   PetscMPIInt    size;
4966 
4967   PetscFunctionBegin;
4968   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4969   if (size == 1) {
4970     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4971     if (scall == MAT_INITIAL_MATRIX) {
4972       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4973     } else {
4974       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4975     }
4976     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4977     PetscFunctionReturn(0);
4978   }
4979   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4980   if (scall == MAT_INITIAL_MATRIX) {
4981     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4982   }
4983   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4984   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4985   PetscFunctionReturn(0);
4986 }
4987 
4988 /*@
4989      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4990           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4991           with MatGetSize()
4992 
4993     Not Collective
4994 
4995    Input Parameters:
4996 +    A - the matrix
4997 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4998 
4999    Output Parameter:
5000 .    A_loc - the local sequential matrix generated
5001 
5002     Level: developer
5003 
5004 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5005 
5006 @*/
5007 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5008 {
5009   PetscErrorCode ierr;
5010   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5011   Mat_SeqAIJ     *mat,*a,*b;
5012   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5013   MatScalar      *aa,*ba,*cam;
5014   PetscScalar    *ca;
5015   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5016   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5017   PetscBool      match;
5018   MPI_Comm       comm;
5019   PetscMPIInt    size;
5020 
5021   PetscFunctionBegin;
5022   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5023   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5024   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5025   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5026   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5027 
5028   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5029   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5030   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5031   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5032   aa = a->a; ba = b->a;
5033   if (scall == MAT_INITIAL_MATRIX) {
5034     if (size == 1) {
5035       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5036       PetscFunctionReturn(0);
5037     }
5038 
5039     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5040     ci[0] = 0;
5041     for (i=0; i<am; i++) {
5042       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5043     }
5044     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5045     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5046     k    = 0;
5047     for (i=0; i<am; i++) {
5048       ncols_o = bi[i+1] - bi[i];
5049       ncols_d = ai[i+1] - ai[i];
5050       /* off-diagonal portion of A */
5051       for (jo=0; jo<ncols_o; jo++) {
5052         col = cmap[*bj];
5053         if (col >= cstart) break;
5054         cj[k]   = col; bj++;
5055         ca[k++] = *ba++;
5056       }
5057       /* diagonal portion of A */
5058       for (j=0; j<ncols_d; j++) {
5059         cj[k]   = cstart + *aj++;
5060         ca[k++] = *aa++;
5061       }
5062       /* off-diagonal portion of A */
5063       for (j=jo; j<ncols_o; j++) {
5064         cj[k]   = cmap[*bj++];
5065         ca[k++] = *ba++;
5066       }
5067     }
5068     /* put together the new matrix */
5069     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5070     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5071     /* Since these are PETSc arrays, change flags to free them as necessary. */
5072     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5073     mat->free_a  = PETSC_TRUE;
5074     mat->free_ij = PETSC_TRUE;
5075     mat->nonew   = 0;
5076   } else if (scall == MAT_REUSE_MATRIX) {
5077     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5078     ci = mat->i; cj = mat->j; cam = mat->a;
5079     for (i=0; i<am; i++) {
5080       /* off-diagonal portion of A */
5081       ncols_o = bi[i+1] - bi[i];
5082       for (jo=0; jo<ncols_o; jo++) {
5083         col = cmap[*bj];
5084         if (col >= cstart) break;
5085         *cam++ = *ba++; bj++;
5086       }
5087       /* diagonal portion of A */
5088       ncols_d = ai[i+1] - ai[i];
5089       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5090       /* off-diagonal portion of A */
5091       for (j=jo; j<ncols_o; j++) {
5092         *cam++ = *ba++; bj++;
5093       }
5094     }
5095   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5096   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5097   PetscFunctionReturn(0);
5098 }
5099 
5100 /*@C
5101      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5102 
5103     Not Collective
5104 
5105    Input Parameters:
5106 +    A - the matrix
5107 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5108 -    row, col - index sets of rows and columns to extract (or NULL)
5109 
5110    Output Parameter:
5111 .    A_loc - the local sequential matrix generated
5112 
5113     Level: developer
5114 
5115 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5116 
5117 @*/
5118 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5119 {
5120   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5121   PetscErrorCode ierr;
5122   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5123   IS             isrowa,iscola;
5124   Mat            *aloc;
5125   PetscBool      match;
5126 
5127   PetscFunctionBegin;
5128   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5129   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5130   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5131   if (!row) {
5132     start = A->rmap->rstart; end = A->rmap->rend;
5133     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5134   } else {
5135     isrowa = *row;
5136   }
5137   if (!col) {
5138     start = A->cmap->rstart;
5139     cmap  = a->garray;
5140     nzA   = a->A->cmap->n;
5141     nzB   = a->B->cmap->n;
5142     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5143     ncols = 0;
5144     for (i=0; i<nzB; i++) {
5145       if (cmap[i] < start) idx[ncols++] = cmap[i];
5146       else break;
5147     }
5148     imark = i;
5149     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5150     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5151     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5152   } else {
5153     iscola = *col;
5154   }
5155   if (scall != MAT_INITIAL_MATRIX) {
5156     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5157     aloc[0] = *A_loc;
5158   }
5159   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5160   if (!col) { /* attach global id of condensed columns */
5161     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5162   }
5163   *A_loc = aloc[0];
5164   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5165   if (!row) {
5166     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5167   }
5168   if (!col) {
5169     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5170   }
5171   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5172   PetscFunctionReturn(0);
5173 }
5174 
5175 /*@C
5176     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5177 
5178     Collective on Mat
5179 
5180    Input Parameters:
5181 +    A,B - the matrices in mpiaij format
5182 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5183 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5184 
5185    Output Parameter:
5186 +    rowb, colb - index sets of rows and columns of B to extract
5187 -    B_seq - the sequential matrix generated
5188 
5189     Level: developer
5190 
5191 @*/
5192 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5193 {
5194   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5195   PetscErrorCode ierr;
5196   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5197   IS             isrowb,iscolb;
5198   Mat            *bseq=NULL;
5199 
5200   PetscFunctionBegin;
5201   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5202     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5203   }
5204   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5205 
5206   if (scall == MAT_INITIAL_MATRIX) {
5207     start = A->cmap->rstart;
5208     cmap  = a->garray;
5209     nzA   = a->A->cmap->n;
5210     nzB   = a->B->cmap->n;
5211     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5212     ncols = 0;
5213     for (i=0; i<nzB; i++) {  /* row < local row index */
5214       if (cmap[i] < start) idx[ncols++] = cmap[i];
5215       else break;
5216     }
5217     imark = i;
5218     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5219     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5220     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5221     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5222   } else {
5223     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5224     isrowb  = *rowb; iscolb = *colb;
5225     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5226     bseq[0] = *B_seq;
5227   }
5228   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5229   *B_seq = bseq[0];
5230   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5231   if (!rowb) {
5232     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5233   } else {
5234     *rowb = isrowb;
5235   }
5236   if (!colb) {
5237     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5238   } else {
5239     *colb = iscolb;
5240   }
5241   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5242   PetscFunctionReturn(0);
5243 }
5244 
5245 /*
5246     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5247     of the OFF-DIAGONAL portion of local A
5248 
5249     Collective on Mat
5250 
5251    Input Parameters:
5252 +    A,B - the matrices in mpiaij format
5253 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5254 
5255    Output Parameter:
5256 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5257 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5258 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5259 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5260 
5261     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5262      for this matrix. This is not desirable..
5263 
5264     Level: developer
5265 
5266 */
5267 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5268 {
5269   PetscErrorCode         ierr;
5270   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5271   Mat_SeqAIJ             *b_oth;
5272   VecScatter             ctx;
5273   MPI_Comm               comm;
5274   const PetscMPIInt      *rprocs,*sprocs;
5275   const PetscInt         *srow,*rstarts,*sstarts;
5276   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5277   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5278   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5279   MPI_Request            *rwaits = NULL,*swaits = NULL;
5280   MPI_Status             rstatus;
5281   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5282 
5283   PetscFunctionBegin;
5284   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5286 
5287   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5288     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5289   }
5290   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5291   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5292 
5293   if (size == 1) {
5294     startsj_s = NULL;
5295     bufa_ptr  = NULL;
5296     *B_oth    = NULL;
5297     PetscFunctionReturn(0);
5298   }
5299 
5300   ctx = a->Mvctx;
5301   tag = ((PetscObject)ctx)->tag;
5302 
5303   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5304   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5305   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5306   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5307   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5308   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5309   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5310 
5311   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5312   if (scall == MAT_INITIAL_MATRIX) {
5313     /* i-array */
5314     /*---------*/
5315     /*  post receives */
5316     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5317     for (i=0; i<nrecvs; i++) {
5318       rowlen = rvalues + rstarts[i]*rbs;
5319       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5320       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5321     }
5322 
5323     /* pack the outgoing message */
5324     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5325 
5326     sstartsj[0] = 0;
5327     rstartsj[0] = 0;
5328     len         = 0; /* total length of j or a array to be sent */
5329     if (nsends) {
5330       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5331       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5332     }
5333     for (i=0; i<nsends; i++) {
5334       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5335       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5336       for (j=0; j<nrows; j++) {
5337         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5338         for (l=0; l<sbs; l++) {
5339           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5340 
5341           rowlen[j*sbs+l] = ncols;
5342 
5343           len += ncols;
5344           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5345         }
5346         k++;
5347       }
5348       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5349 
5350       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5351     }
5352     /* recvs and sends of i-array are completed */
5353     i = nrecvs;
5354     while (i--) {
5355       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5356     }
5357     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5358     ierr = PetscFree(svalues);CHKERRQ(ierr);
5359 
5360     /* allocate buffers for sending j and a arrays */
5361     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5362     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5363 
5364     /* create i-array of B_oth */
5365     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5366 
5367     b_othi[0] = 0;
5368     len       = 0; /* total length of j or a array to be received */
5369     k         = 0;
5370     for (i=0; i<nrecvs; i++) {
5371       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5372       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5373       for (j=0; j<nrows; j++) {
5374         b_othi[k+1] = b_othi[k] + rowlen[j];
5375         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5376         k++;
5377       }
5378       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5379     }
5380     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5381 
5382     /* allocate space for j and a arrrays of B_oth */
5383     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5384     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5385 
5386     /* j-array */
5387     /*---------*/
5388     /*  post receives of j-array */
5389     for (i=0; i<nrecvs; i++) {
5390       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5391       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5392     }
5393 
5394     /* pack the outgoing message j-array */
5395     if (nsends) k = sstarts[0];
5396     for (i=0; i<nsends; i++) {
5397       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5398       bufJ  = bufj+sstartsj[i];
5399       for (j=0; j<nrows; j++) {
5400         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5401         for (ll=0; ll<sbs; ll++) {
5402           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5403           for (l=0; l<ncols; l++) {
5404             *bufJ++ = cols[l];
5405           }
5406           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5407         }
5408       }
5409       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5410     }
5411 
5412     /* recvs and sends of j-array are completed */
5413     i = nrecvs;
5414     while (i--) {
5415       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5416     }
5417     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5418   } else if (scall == MAT_REUSE_MATRIX) {
5419     sstartsj = *startsj_s;
5420     rstartsj = *startsj_r;
5421     bufa     = *bufa_ptr;
5422     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5423     b_otha   = b_oth->a;
5424   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5425 
5426   /* a-array */
5427   /*---------*/
5428   /*  post receives of a-array */
5429   for (i=0; i<nrecvs; i++) {
5430     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5431     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5432   }
5433 
5434   /* pack the outgoing message a-array */
5435   if (nsends) k = sstarts[0];
5436   for (i=0; i<nsends; i++) {
5437     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5438     bufA  = bufa+sstartsj[i];
5439     for (j=0; j<nrows; j++) {
5440       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5441       for (ll=0; ll<sbs; ll++) {
5442         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5443         for (l=0; l<ncols; l++) {
5444           *bufA++ = vals[l];
5445         }
5446         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5447       }
5448     }
5449     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5450   }
5451   /* recvs and sends of a-array are completed */
5452   i = nrecvs;
5453   while (i--) {
5454     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5455   }
5456   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5457   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5458 
5459   if (scall == MAT_INITIAL_MATRIX) {
5460     /* put together the new matrix */
5461     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5462 
5463     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5464     /* Since these are PETSc arrays, change flags to free them as necessary. */
5465     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5466     b_oth->free_a  = PETSC_TRUE;
5467     b_oth->free_ij = PETSC_TRUE;
5468     b_oth->nonew   = 0;
5469 
5470     ierr = PetscFree(bufj);CHKERRQ(ierr);
5471     if (!startsj_s || !bufa_ptr) {
5472       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5473       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5474     } else {
5475       *startsj_s = sstartsj;
5476       *startsj_r = rstartsj;
5477       *bufa_ptr  = bufa;
5478     }
5479   }
5480 
5481   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5482   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5483   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5484   PetscFunctionReturn(0);
5485 }
5486 
5487 /*@C
5488   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5489 
5490   Not Collective
5491 
5492   Input Parameters:
5493 . A - The matrix in mpiaij format
5494 
5495   Output Parameter:
5496 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5497 . colmap - A map from global column index to local index into lvec
5498 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5499 
5500   Level: developer
5501 
5502 @*/
5503 #if defined(PETSC_USE_CTABLE)
5504 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5505 #else
5506 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5507 #endif
5508 {
5509   Mat_MPIAIJ *a;
5510 
5511   PetscFunctionBegin;
5512   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5513   PetscValidPointer(lvec, 2);
5514   PetscValidPointer(colmap, 3);
5515   PetscValidPointer(multScatter, 4);
5516   a = (Mat_MPIAIJ*) A->data;
5517   if (lvec) *lvec = a->lvec;
5518   if (colmap) *colmap = a->colmap;
5519   if (multScatter) *multScatter = a->Mvctx;
5520   PetscFunctionReturn(0);
5521 }
5522 
5523 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5524 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5526 #if defined(PETSC_HAVE_MKL_SPARSE)
5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5528 #endif
5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5530 #if defined(PETSC_HAVE_ELEMENTAL)
5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5532 #endif
5533 #if defined(PETSC_HAVE_HYPRE)
5534 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5535 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5536 #endif
5537 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5539 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5540 
5541 /*
5542     Computes (B'*A')' since computing B*A directly is untenable
5543 
5544                n                       p                          p
5545         (              )       (              )         (                  )
5546       m (      A       )  *  n (       B      )   =   m (         C        )
5547         (              )       (              )         (                  )
5548 
5549 */
5550 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5551 {
5552   PetscErrorCode ierr;
5553   Mat            At,Bt,Ct;
5554 
5555   PetscFunctionBegin;
5556   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5557   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5558   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5559   ierr = MatDestroy(&At);CHKERRQ(ierr);
5560   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5561   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5562   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5563   PetscFunctionReturn(0);
5564 }
5565 
5566 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5567 {
5568   PetscErrorCode ierr;
5569   PetscInt       m=A->rmap->n,n=B->cmap->n;
5570   Mat            Cmat;
5571 
5572   PetscFunctionBegin;
5573   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5574   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5575   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5576   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5577   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5578   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5579   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5580   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5581 
5582   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5583 
5584   *C = Cmat;
5585   PetscFunctionReturn(0);
5586 }
5587 
5588 /* ----------------------------------------------------------------*/
5589 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5590 {
5591   PetscErrorCode ierr;
5592 
5593   PetscFunctionBegin;
5594   if (scall == MAT_INITIAL_MATRIX) {
5595     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5596     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5597     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5598   }
5599   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5600   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5601   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5602   PetscFunctionReturn(0);
5603 }
5604 
5605 /*MC
5606    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5607 
5608    Options Database Keys:
5609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5610 
5611   Level: beginner
5612 
5613 .seealso: MatCreateAIJ()
5614 M*/
5615 
5616 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5617 {
5618   Mat_MPIAIJ     *b;
5619   PetscErrorCode ierr;
5620   PetscMPIInt    size;
5621 
5622   PetscFunctionBegin;
5623   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5624 
5625   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5626   B->data       = (void*)b;
5627   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5628   B->assembled  = PETSC_FALSE;
5629   B->insertmode = NOT_SET_VALUES;
5630   b->size       = size;
5631 
5632   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5633 
5634   /* build cache for off array entries formed */
5635   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5636 
5637   b->donotstash  = PETSC_FALSE;
5638   b->colmap      = 0;
5639   b->garray      = 0;
5640   b->roworiented = PETSC_TRUE;
5641 
5642   /* stuff used for matrix vector multiply */
5643   b->lvec  = NULL;
5644   b->Mvctx = NULL;
5645 
5646   /* stuff for MatGetRow() */
5647   b->rowindices   = 0;
5648   b->rowvalues    = 0;
5649   b->getrowactive = PETSC_FALSE;
5650 
5651   /* flexible pointer used in CUSP/CUSPARSE classes */
5652   b->spptr = NULL;
5653 
5654   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5664 #if defined(PETSC_HAVE_MKL_SPARSE)
5665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5666 #endif
5667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5669 #if defined(PETSC_HAVE_ELEMENTAL)
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5671 #endif
5672 #if defined(PETSC_HAVE_HYPRE)
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5674 #endif
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5680 #if defined(PETSC_HAVE_HYPRE)
5681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5682 #endif
5683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5684   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5685   PetscFunctionReturn(0);
5686 }
5687 
5688 /*@C
5689      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5690          and "off-diagonal" part of the matrix in CSR format.
5691 
5692    Collective on MPI_Comm
5693 
5694    Input Parameters:
5695 +  comm - MPI communicator
5696 .  m - number of local rows (Cannot be PETSC_DECIDE)
5697 .  n - This value should be the same as the local size used in creating the
5698        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5699        calculated if N is given) For square matrices n is almost always m.
5700 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5701 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5702 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5703 .   j - column indices
5704 .   a - matrix values
5705 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5706 .   oj - column indices
5707 -   oa - matrix values
5708 
5709    Output Parameter:
5710 .   mat - the matrix
5711 
5712    Level: advanced
5713 
5714    Notes:
5715        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5716        must free the arrays once the matrix has been destroyed and not before.
5717 
5718        The i and j indices are 0 based
5719 
5720        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5721 
5722        This sets local rows and cannot be used to set off-processor values.
5723 
5724        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5725        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5726        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5727        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5728        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5729        communication if it is known that only local entries will be set.
5730 
5731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5732           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5733 @*/
5734 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5735 {
5736   PetscErrorCode ierr;
5737   Mat_MPIAIJ     *maij;
5738 
5739   PetscFunctionBegin;
5740   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5741   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5742   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5743   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5744   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5745   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5746   maij = (Mat_MPIAIJ*) (*mat)->data;
5747 
5748   (*mat)->preallocated = PETSC_TRUE;
5749 
5750   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5751   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5752 
5753   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5754   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5755 
5756   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5757   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5758   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5759   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5760 
5761   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5762   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5763   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5764   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5765   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5766   PetscFunctionReturn(0);
5767 }
5768 
5769 /*
5770     Special version for direct calls from Fortran
5771 */
5772 #include <petsc/private/fortranimpl.h>
5773 
5774 /* Change these macros so can be used in void function */
5775 #undef CHKERRQ
5776 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5777 #undef SETERRQ2
5778 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5779 #undef SETERRQ3
5780 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5781 #undef SETERRQ
5782 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5783 
5784 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5785 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5786 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5787 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5788 #else
5789 #endif
5790 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5791 {
5792   Mat            mat  = *mmat;
5793   PetscInt       m    = *mm, n = *mn;
5794   InsertMode     addv = *maddv;
5795   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5796   PetscScalar    value;
5797   PetscErrorCode ierr;
5798 
5799   MatCheckPreallocated(mat,1);
5800   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5801 
5802 #if defined(PETSC_USE_DEBUG)
5803   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5804 #endif
5805   {
5806     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5807     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5808     PetscBool roworiented = aij->roworiented;
5809 
5810     /* Some Variables required in the macro */
5811     Mat        A                 = aij->A;
5812     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5813     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5814     MatScalar  *aa               = a->a;
5815     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5816     Mat        B                 = aij->B;
5817     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5818     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5819     MatScalar  *ba               = b->a;
5820 
5821     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5822     PetscInt  nonew = a->nonew;
5823     MatScalar *ap1,*ap2;
5824 
5825     PetscFunctionBegin;
5826     for (i=0; i<m; i++) {
5827       if (im[i] < 0) continue;
5828 #if defined(PETSC_USE_DEBUG)
5829       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5830 #endif
5831       if (im[i] >= rstart && im[i] < rend) {
5832         row      = im[i] - rstart;
5833         lastcol1 = -1;
5834         rp1      = aj + ai[row];
5835         ap1      = aa + ai[row];
5836         rmax1    = aimax[row];
5837         nrow1    = ailen[row];
5838         low1     = 0;
5839         high1    = nrow1;
5840         lastcol2 = -1;
5841         rp2      = bj + bi[row];
5842         ap2      = ba + bi[row];
5843         rmax2    = bimax[row];
5844         nrow2    = bilen[row];
5845         low2     = 0;
5846         high2    = nrow2;
5847 
5848         for (j=0; j<n; j++) {
5849           if (roworiented) value = v[i*n+j];
5850           else value = v[i+j*m];
5851           if (in[j] >= cstart && in[j] < cend) {
5852             col = in[j] - cstart;
5853             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5854             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5855           } else if (in[j] < 0) continue;
5856 #if defined(PETSC_USE_DEBUG)
5857           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5858           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5859 #endif
5860           else {
5861             if (mat->was_assembled) {
5862               if (!aij->colmap) {
5863                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5864               }
5865 #if defined(PETSC_USE_CTABLE)
5866               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5867               col--;
5868 #else
5869               col = aij->colmap[in[j]] - 1;
5870 #endif
5871               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5872               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5873                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5874                 col  =  in[j];
5875                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5876                 B     = aij->B;
5877                 b     = (Mat_SeqAIJ*)B->data;
5878                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5879                 rp2   = bj + bi[row];
5880                 ap2   = ba + bi[row];
5881                 rmax2 = bimax[row];
5882                 nrow2 = bilen[row];
5883                 low2  = 0;
5884                 high2 = nrow2;
5885                 bm    = aij->B->rmap->n;
5886                 ba    = b->a;
5887               }
5888             } else col = in[j];
5889             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5890           }
5891         }
5892       } else if (!aij->donotstash) {
5893         if (roworiented) {
5894           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5895         } else {
5896           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5897         }
5898       }
5899     }
5900   }
5901   PetscFunctionReturnVoid();
5902 }
5903