xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8fcddce65efd55a8fe3f87d4c08c15577ce4cbef)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/vecscatterimpl.h>
6 #include <petsc/private/isimpl.h>
7 #include <petscblaslapack.h>
8 #include <petscsf.h>
9 
10 /*MC
11    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12 
13    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
14    and MATMPIAIJ otherwise.  As a result, for single process communicators,
15   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
16   for communicators controlling multiple processes.  It is recommended that you call both of
17   the above preallocation routines for simplicity.
18 
19    Options Database Keys:
20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21 
22   Developer Notes:
23     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
24    enough exist.
25 
26   Level: beginner
27 
28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
29 M*/
30 
31 /*MC
32    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
35    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
36    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
42 
43   Level: beginner
44 
45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
46 M*/
47 
48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
49 {
50   PetscErrorCode ierr;
51   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
52 
53   PetscFunctionBegin;
54   if (mat->A) {
55     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
56     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
62 {
63   PetscErrorCode  ierr;
64   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
65   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
66   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
67   const PetscInt  *ia,*ib;
68   const MatScalar *aa,*bb;
69   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
70   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
71 
72   PetscFunctionBegin;
73   *keptrows = 0;
74   ia        = a->i;
75   ib        = b->i;
76   for (i=0; i<m; i++) {
77     na = ia[i+1] - ia[i];
78     nb = ib[i+1] - ib[i];
79     if (!na && !nb) {
80       cnt++;
81       goto ok1;
82     }
83     aa = a->a + ia[i];
84     for (j=0; j<na; j++) {
85       if (aa[j] != 0.0) goto ok1;
86     }
87     bb = b->a + ib[i];
88     for (j=0; j <nb; j++) {
89       if (bb[j] != 0.0) goto ok1;
90     }
91     cnt++;
92 ok1:;
93   }
94   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
95   if (!n0rows) PetscFunctionReturn(0);
96   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
97   cnt  = 0;
98   for (i=0; i<m; i++) {
99     na = ia[i+1] - ia[i];
100     nb = ib[i+1] - ib[i];
101     if (!na && !nb) continue;
102     aa = a->a + ia[i];
103     for (j=0; j<na;j++) {
104       if (aa[j] != 0.0) {
105         rows[cnt++] = rstart + i;
106         goto ok2;
107       }
108     }
109     bb = b->a + ib[i];
110     for (j=0; j<nb; j++) {
111       if (bb[j] != 0.0) {
112         rows[cnt++] = rstart + i;
113         goto ok2;
114       }
115     }
116 ok2:;
117   }
118   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
123 {
124   PetscErrorCode    ierr;
125   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
126   PetscBool         cong;
127 
128   PetscFunctionBegin;
129   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
130   if (Y->assembled && cong) {
131     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
132   } else {
133     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
139 {
140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
141   PetscErrorCode ierr;
142   PetscInt       i,rstart,nrows,*rows;
143 
144   PetscFunctionBegin;
145   *zrows = NULL;
146   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
147   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
148   for (i=0; i<nrows; i++) rows[i] += rstart;
149   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
154 {
155   PetscErrorCode ierr;
156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
157   PetscInt       i,n,*garray = aij->garray;
158   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
159   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
160   PetscReal      *work;
161 
162   PetscFunctionBegin;
163   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
164   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
165   if (type == NORM_2) {
166     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
167       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
168     }
169     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
170       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
171     }
172   } else if (type == NORM_1) {
173     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
174       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
175     }
176     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
177       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
178     }
179   } else if (type == NORM_INFINITY) {
180     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
181       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
182     }
183     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
184       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
185     }
186 
187   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
188   if (type == NORM_INFINITY) {
189     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
190   } else {
191     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
192   }
193   ierr = PetscFree(work);CHKERRQ(ierr);
194   if (type == NORM_2) {
195     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
196   }
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
201 {
202   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
203   IS              sis,gis;
204   PetscErrorCode  ierr;
205   const PetscInt  *isis,*igis;
206   PetscInt        n,*iis,nsis,ngis,rstart,i;
207 
208   PetscFunctionBegin;
209   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
210   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
211   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
212   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
213   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
214   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
215 
216   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
217   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
218   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
219   n    = ngis + nsis;
220   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
221   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
222   for (i=0; i<n; i++) iis[i] += rstart;
223   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
224 
225   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
226   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
227   ierr = ISDestroy(&sis);CHKERRQ(ierr);
228   ierr = ISDestroy(&gis);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 /*
233     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
234     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
235 
236     Only for square matrices
237 
238     Used by a preconditioner, hence PETSC_EXTERN
239 */
240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
241 {
242   PetscMPIInt    rank,size;
243   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
244   PetscErrorCode ierr;
245   Mat            mat;
246   Mat_SeqAIJ     *gmata;
247   PetscMPIInt    tag;
248   MPI_Status     status;
249   PetscBool      aij;
250   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
251 
252   PetscFunctionBegin;
253   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
255   if (!rank) {
256     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
257     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
258   }
259   if (reuse == MAT_INITIAL_MATRIX) {
260     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
261     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
262     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
263     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
264     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
265     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
266     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
267     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
268     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
269 
270     rowners[0] = 0;
271     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
272     rstart = rowners[rank];
273     rend   = rowners[rank+1];
274     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
275     if (!rank) {
276       gmata = (Mat_SeqAIJ*) gmat->data;
277       /* send row lengths to all processors */
278       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
279       for (i=1; i<size; i++) {
280         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281       }
282       /* determine number diagonal and off-diagonal counts */
283       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
284       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
285       jj   = 0;
286       for (i=0; i<m; i++) {
287         for (j=0; j<dlens[i]; j++) {
288           if (gmata->j[jj] < rstart) ld[i]++;
289           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
290           jj++;
291         }
292       }
293       /* send column indices to other processes */
294       for (i=1; i<size; i++) {
295         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
296         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
297         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
298       }
299 
300       /* send numerical values to other processes */
301       for (i=1; i<size; i++) {
302         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
303         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
304       }
305       gmataa = gmata->a;
306       gmataj = gmata->j;
307 
308     } else {
309       /* receive row lengths */
310       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* receive column indices */
312       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
313       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
314       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
315       /* determine number diagonal and off-diagonal counts */
316       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
317       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
318       jj   = 0;
319       for (i=0; i<m; i++) {
320         for (j=0; j<dlens[i]; j++) {
321           if (gmataj[jj] < rstart) ld[i]++;
322           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
323           jj++;
324         }
325       }
326       /* receive numerical values */
327       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
328       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
329     }
330     /* set preallocation */
331     for (i=0; i<m; i++) {
332       dlens[i] -= olens[i];
333     }
334     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
335     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
336 
337     for (i=0; i<m; i++) {
338       dlens[i] += olens[i];
339     }
340     cnt = 0;
341     for (i=0; i<m; i++) {
342       row  = rstart + i;
343       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
344       cnt += dlens[i];
345     }
346     if (rank) {
347       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
348     }
349     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
350     ierr = PetscFree(rowners);CHKERRQ(ierr);
351 
352     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
353 
354     *inmat = mat;
355   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
356     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
357     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
358     mat  = *inmat;
359     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
360     if (!rank) {
361       /* send numerical values to other processes */
362       gmata  = (Mat_SeqAIJ*) gmat->data;
363       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
364       gmataa = gmata->a;
365       for (i=1; i<size; i++) {
366         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
367         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
368       }
369       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
370     } else {
371       /* receive numerical values from process 0*/
372       nz   = Ad->nz + Ao->nz;
373       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
374       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
375     }
376     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
377     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
378     ad = Ad->a;
379     ao = Ao->a;
380     if (mat->rmap->n) {
381       i  = 0;
382       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     for (i=1; i<mat->rmap->n; i++) {
386       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
387       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
388     }
389     i--;
390     if (mat->rmap->n) {
391       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
392     }
393     if (rank) {
394       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
395     }
396   }
397   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 /*
403   Local utility routine that creates a mapping from the global column
404 number to the local number in the off-diagonal part of the local
405 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
406 a slightly higher hash table cost; without it it is not scalable (each processor
407 has an order N integer array but is fast to acess.
408 */
409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       n = aij->B->cmap->n,i;
414 
415   PetscFunctionBegin;
416   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
417 #if defined(PETSC_USE_CTABLE)
418   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
419   for (i=0; i<n; i++) {
420     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
421   }
422 #else
423   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
424   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
425   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
426 #endif
427   PetscFunctionReturn(0);
428 }
429 
430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
431 { \
432     if (col <= lastcol1)  low1 = 0;     \
433     else                 high1 = nrow1; \
434     lastcol1 = col;\
435     while (high1-low1 > 5) { \
436       t = (low1+high1)/2; \
437       if (rp1[t] > col) high1 = t; \
438       else              low1  = t; \
439     } \
440       for (_i=low1; _i<high1; _i++) { \
441         if (rp1[_i] > col) break; \
442         if (rp1[_i] == col) { \
443           if (addv == ADD_VALUES) ap1[_i] += value;   \
444           else                    ap1[_i] = value; \
445           goto a_noinsert; \
446         } \
447       }  \
448       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
449       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
450       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
451       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
452       N = nrow1++ - 1; a->nz++; high1++; \
453       /* shift up all the later entries in this row */ \
454       for (ii=N; ii>=_i; ii--) { \
455         rp1[ii+1] = rp1[ii]; \
456         ap1[ii+1] = ap1[ii]; \
457       } \
458       rp1[_i] = col;  \
459       ap1[_i] = value;  \
460       A->nonzerostate++;\
461       a_noinsert: ; \
462       ailen[row] = nrow1; \
463 }
464 
465 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
466   { \
467     if (col <= lastcol2) low2 = 0;                        \
468     else high2 = nrow2;                                   \
469     lastcol2 = col;                                       \
470     while (high2-low2 > 5) {                              \
471       t = (low2+high2)/2;                                 \
472       if (rp2[t] > col) high2 = t;                        \
473       else             low2  = t;                         \
474     }                                                     \
475     for (_i=low2; _i<high2; _i++) {                       \
476       if (rp2[_i] > col) break;                           \
477       if (rp2[_i] == col) {                               \
478         if (addv == ADD_VALUES) ap2[_i] += value;         \
479         else                    ap2[_i] = value;          \
480         goto b_noinsert;                                  \
481       }                                                   \
482     }                                                     \
483     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
484     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
485     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
486     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
487     N = nrow2++ - 1; b->nz++; high2++;                    \
488     /* shift up all the later entries in this row */      \
489     for (ii=N; ii>=_i; ii--) {                            \
490       rp2[ii+1] = rp2[ii];                                \
491       ap2[ii+1] = ap2[ii];                                \
492     }                                                     \
493     rp2[_i] = col;                                        \
494     ap2[_i] = value;                                      \
495     B->nonzerostate++;                                    \
496     b_noinsert: ;                                         \
497     bilen[row] = nrow2;                                   \
498   }
499 
500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
501 {
502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
503   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
504   PetscErrorCode ierr;
505   PetscInt       l,*garray = mat->garray,diag;
506 
507   PetscFunctionBegin;
508   /* code only works for square matrices A */
509 
510   /* find size of row to the left of the diagonal part */
511   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
512   row  = row - diag;
513   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
514     if (garray[b->j[b->i[row]+l]] > diag) break;
515   }
516   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* diagonal part */
519   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
520 
521   /* right of diagonal part */
522   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
527 {
528   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
529   PetscScalar    value;
530   PetscErrorCode ierr;
531   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
532   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
533   PetscBool      roworiented = aij->roworiented;
534 
535   /* Some Variables required in the macro */
536   Mat        A                 = aij->A;
537   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
538   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
539   MatScalar  *aa               = a->a;
540   PetscBool  ignorezeroentries = a->ignorezeroentries;
541   Mat        B                 = aij->B;
542   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
543   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
544   MatScalar  *ba               = b->a;
545 
546   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
547   PetscInt  nonew;
548   MatScalar *ap1,*ap2;
549 
550   PetscFunctionBegin;
551   for (i=0; i<m; i++) {
552     if (im[i] < 0) continue;
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
555 #endif
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = aj + ai[row];
560       ap1      = aa + ai[row];
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = bj + bi[row];
567       ap2      = ba + bi[row];
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j=0; j<n; j++) {
574         if (roworiented) value = v[i*n+j];
575         else             value = v[i+j*m];
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
580           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
581         } else if (in[j] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
584 #endif
585         else {
586           if (mat->was_assembled) {
587             if (!aij->colmap) {
588               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
589             }
590 #if defined(PETSC_USE_CTABLE)
591             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
592             col--;
593 #else
594             col = aij->colmap[in[j]] - 1;
595 #endif
596             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
597               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
598               col  =  in[j];
599               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
600               B     = aij->B;
601               b     = (Mat_SeqAIJ*)B->data;
602               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
603               rp2   = bj + bi[row];
604               ap2   = ba + bi[row];
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0) {
612               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
613                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
614               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
619         }
620       }
621     } else {
622       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
638     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
639     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
640 */
641 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
642 {
643   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
644   Mat            A           = aij->A; /* diagonal part of the matrix */
645   Mat            B           = aij->B; /* offdiagonal part of the matrix */
646   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
647   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
648   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
649   PetscInt       *ailen      = a->ilen,*aj = a->j;
650   PetscInt       *bilen      = b->ilen,*bj = b->j;
651   PetscInt       am          = aij->A->rmap->n,j;
652   PetscInt       diag_so_far = 0,dnz;
653   PetscInt       offd_so_far = 0,onz;
654 
655   PetscFunctionBegin;
656   /* Iterate over all rows of the matrix */
657   for (j=0; j<am; j++) {
658     dnz = onz = 0;
659     /*  Iterate over all non-zero columns of the current row */
660     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
661       /* If column is in the diagonal */
662       if (mat_j[col] >= cstart && mat_j[col] < cend) {
663         aj[diag_so_far++] = mat_j[col] - cstart;
664         dnz++;
665       } else { /* off-diagonal entries */
666         bj[offd_so_far++] = mat_j[col];
667         onz++;
668       }
669     }
670     ailen[j] = dnz;
671     bilen[j] = onz;
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 /*
677     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
678     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
679     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
680     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
681     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
682 */
683 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
684 {
685   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
686   Mat            A      = aij->A; /* diagonal part of the matrix */
687   Mat            B      = aij->B; /* offdiagonal part of the matrix */
688   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
689   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
690   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
691   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
692   PetscInt       *ailen = a->ilen,*aj = a->j;
693   PetscInt       *bilen = b->ilen,*bj = b->j;
694   PetscInt       am     = aij->A->rmap->n,j;
695   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
696   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
697   PetscScalar    *aa = a->a,*ba = b->a;
698 
699   PetscFunctionBegin;
700   /* Iterate over all rows of the matrix */
701   for (j=0; j<am; j++) {
702     dnz_row = onz_row = 0;
703     rowstart_offd = full_offd_i[j];
704     rowstart_diag = full_diag_i[j];
705     /*  Iterate over all non-zero columns of the current row */
706     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
707       /* If column is in the diagonal */
708       if (mat_j[col] >= cstart && mat_j[col] < cend) {
709         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
710         aa[rowstart_diag+dnz_row] = mat_a[col];
711         dnz_row++;
712       } else { /* off-diagonal entries */
713         bj[rowstart_offd+onz_row] = mat_j[col];
714         ba[rowstart_offd+onz_row] = mat_a[col];
715         onz_row++;
716       }
717     }
718     ailen[j] = dnz_row;
719     bilen[j] = onz_row;
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
725 {
726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
727   PetscErrorCode ierr;
728   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
729   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
730 
731   PetscFunctionBegin;
732   for (i=0; i<m; i++) {
733     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
734     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
735     if (idxm[i] >= rstart && idxm[i] < rend) {
736       row = idxm[i] - rstart;
737       for (j=0; j<n; j++) {
738         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
739         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
740         if (idxn[j] >= cstart && idxn[j] < cend) {
741           col  = idxn[j] - cstart;
742           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
743         } else {
744           if (!aij->colmap) {
745             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
746           }
747 #if defined(PETSC_USE_CTABLE)
748           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
749           col--;
750 #else
751           col = aij->colmap[idxn[j]] - 1;
752 #endif
753           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
754           else {
755             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
756           }
757         }
758       }
759     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
765 
766 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
767 {
768   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
769   PetscErrorCode ierr;
770   PetscInt       nstash,reallocs;
771 
772   PetscFunctionBegin;
773   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
774 
775   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
776   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
777   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
782 {
783   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
784   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
785   PetscErrorCode ierr;
786   PetscMPIInt    n;
787   PetscInt       i,j,rstart,ncols,flg;
788   PetscInt       *row,*col;
789   PetscBool      other_disassembled;
790   PetscScalar    *val;
791 
792   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
793 
794   PetscFunctionBegin;
795   if (!aij->donotstash && !mat->nooffprocentries) {
796     while (1) {
797       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
798       if (!flg) break;
799 
800       for (i=0; i<n; ) {
801         /* Now identify the consecutive vals belonging to the same row */
802         for (j=i,rstart=row[j]; j<n; j++) {
803           if (row[j] != rstart) break;
804         }
805         if (j < n) ncols = j-i;
806         else       ncols = n-i;
807         /* Now assemble all these values with a single function call */
808         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
809 
810         i = j;
811       }
812     }
813     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
814   }
815   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
816   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
817 
818   /* determine if any processor has disassembled, if so we must
819      also disassemble ourselfs, in order that we may reassemble. */
820   /*
821      if nonzero structure of submatrix B cannot change then we know that
822      no processor disassembled thus we can skip this stuff
823   */
824   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
825     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
826     if (mat->was_assembled && !other_disassembled) {
827       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
828     }
829   }
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
831     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
832   }
833   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
834   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
835   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
836 
837   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
838 
839   aij->rowvalues = 0;
840 
841   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
842   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
843 
844   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
845   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
846     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
847     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
853 {
854   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
859   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
864 {
865   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
866   PetscInt      *lrows;
867   PetscInt       r, len;
868   PetscBool      cong;
869   PetscErrorCode ierr;
870 
871   PetscFunctionBegin;
872   /* get locally owned rows */
873   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
874   /* fix right hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
880     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
881     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
882     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
883     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
884   }
885   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
886   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
887   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
888   if ((diag != 0.0) && cong) {
889     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
890   } else if (diag != 0.0) {
891     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
892     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
893     for (r = 0; r < len; ++r) {
894       const PetscInt row = lrows[r] + A->rmap->rstart;
895       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
896     }
897     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
898     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
899   } else {
900     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
901   }
902   ierr = PetscFree(lrows);CHKERRQ(ierr);
903 
904   /* only change matrix nonzero state if pattern was allowed to be changed */
905   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
906     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
907     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
908   }
909   PetscFunctionReturn(0);
910 }
911 
912 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
913 {
914   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
915   PetscErrorCode    ierr;
916   PetscMPIInt       n = A->rmap->n;
917   PetscInt          i,j,r,m,p = 0,len = 0;
918   PetscInt          *lrows,*owners = A->rmap->range;
919   PetscSFNode       *rrows;
920   PetscSF           sf;
921   const PetscScalar *xx;
922   PetscScalar       *bb,*mask;
923   Vec               xmask,lmask;
924   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
925   const PetscInt    *aj, *ii,*ridx;
926   PetscScalar       *aa;
927 
928   PetscFunctionBegin;
929   /* Create SF where leaves are input rows and roots are owned rows */
930   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
931   for (r = 0; r < n; ++r) lrows[r] = -1;
932   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
933   for (r = 0; r < N; ++r) {
934     const PetscInt idx   = rows[r];
935     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
936     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
937       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
938     }
939     rrows[r].rank  = p;
940     rrows[r].index = rows[r] - owners[p];
941   }
942   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
943   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
944   /* Collect flags for rows to be zeroed */
945   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
946   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
947   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
948   /* Compress and put in row numbers */
949   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
950   /* zero diagonal part of matrix */
951   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
952   /* handle off diagonal part of matrix */
953   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
954   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
955   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
956   for (i=0; i<len; i++) bb[lrows[i]] = 1;
957   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
958   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
961   if (x) {
962     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
964     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
965     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
966   }
967   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
968   /* remove zeroed rows of off diagonal matrix */
969   ii = aij->i;
970   for (i=0; i<len; i++) {
971     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
972   }
973   /* loop over all elements of off process part of matrix zeroing removed columns*/
974   if (aij->compressedrow.use) {
975     m    = aij->compressedrow.nrows;
976     ii   = aij->compressedrow.i;
977     ridx = aij->compressedrow.rindex;
978     for (i=0; i<m; i++) {
979       n  = ii[i+1] - ii[i];
980       aj = aij->j + ii[i];
981       aa = aij->a + ii[i];
982 
983       for (j=0; j<n; j++) {
984         if (PetscAbsScalar(mask[*aj])) {
985           if (b) bb[*ridx] -= *aa*xx[*aj];
986           *aa = 0.0;
987         }
988         aa++;
989         aj++;
990       }
991       ridx++;
992     }
993   } else { /* do not use compressed row format */
994     m = l->B->rmap->n;
995     for (i=0; i<m; i++) {
996       n  = ii[i+1] - ii[i];
997       aj = aij->j + ii[i];
998       aa = aij->a + ii[i];
999       for (j=0; j<n; j++) {
1000         if (PetscAbsScalar(mask[*aj])) {
1001           if (b) bb[i] -= *aa*xx[*aj];
1002           *aa = 0.0;
1003         }
1004         aa++;
1005         aj++;
1006       }
1007     }
1008   }
1009   if (x) {
1010     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1011     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1012   }
1013   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1014   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1015   ierr = PetscFree(lrows);CHKERRQ(ierr);
1016 
1017   /* only change matrix nonzero state if pattern was allowed to be changed */
1018   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1019     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1020     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1021   }
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1026 {
1027   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1028   PetscErrorCode ierr;
1029   PetscInt       nt;
1030   VecScatter     Mvctx = a->Mvctx;
1031 
1032   PetscFunctionBegin;
1033   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1034   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1035 
1036   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1038   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1039   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1044 {
1045   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1046   PetscErrorCode ierr;
1047 
1048   PetscFunctionBegin;
1049   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1054 {
1055   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1056   PetscErrorCode ierr;
1057   VecScatter     Mvctx = a->Mvctx;
1058 
1059   PetscFunctionBegin;
1060   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1061   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1063   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1064   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   /* do nondiagonal part */
1075   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1076   /* do local part */
1077   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1078   /* add partial results together */
1079   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1085 {
1086   MPI_Comm       comm;
1087   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1088   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1089   IS             Me,Notme;
1090   PetscErrorCode ierr;
1091   PetscInt       M,N,first,last,*notme,i;
1092   PetscBool      lf;
1093   PetscMPIInt    size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1098   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1099   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1100   if (!*f) PetscFunctionReturn(0);
1101   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1102   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1103   if (size == 1) PetscFunctionReturn(0);
1104 
1105   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1106   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1107   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1108   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1109   for (i=0; i<first; i++) notme[i] = i;
1110   for (i=last; i<M; i++) notme[i-last+first] = i;
1111   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1112   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1113   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1114   Aoff = Aoffs[0];
1115   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1116   Boff = Boffs[0];
1117   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1118   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1119   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1120   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1121   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1122   ierr = PetscFree(notme);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1127 {
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1136 {
1137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   /* do nondiagonal part */
1142   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1143   /* do local part */
1144   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1145   /* add partial results together */
1146   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1147   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 /*
1152   This only works correctly for square matrices where the subblock A->A is the
1153    diagonal block
1154 */
1155 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1156 {
1157   PetscErrorCode ierr;
1158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1159 
1160   PetscFunctionBegin;
1161   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1162   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1163   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1168 {
1169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1174   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1179 {
1180   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184 #if defined(PETSC_USE_LOG)
1185   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1186 #endif
1187   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1188   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1189   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1190   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1191 #if defined(PETSC_USE_CTABLE)
1192   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1193 #else
1194   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1195 #endif
1196   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1197   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1198   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1199   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1200   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1201   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1202   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1203 
1204   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1207   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1211   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1213 #if defined(PETSC_HAVE_ELEMENTAL)
1214   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1215 #endif
1216 #if defined(PETSC_HAVE_HYPRE)
1217   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1219 #endif
1220   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1226 {
1227   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1228   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1229   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1230   PetscErrorCode ierr;
1231   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1232   int            fd;
1233   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1234   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1235   PetscScalar    *column_values;
1236   PetscInt       message_count,flowcontrolcount;
1237   FILE           *file;
1238 
1239   PetscFunctionBegin;
1240   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1241   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1242   nz   = A->nz + B->nz;
1243   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1244   if (!rank) {
1245     header[0] = MAT_FILE_CLASSID;
1246     header[1] = mat->rmap->N;
1247     header[2] = mat->cmap->N;
1248 
1249     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1250     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1251     /* get largest number of rows any processor has */
1252     rlen  = mat->rmap->n;
1253     range = mat->rmap->range;
1254     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1255   } else {
1256     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1257     rlen = mat->rmap->n;
1258   }
1259 
1260   /* load up the local row counts */
1261   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1262   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1263 
1264   /* store the row lengths to the file */
1265   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1266   if (!rank) {
1267     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1268     for (i=1; i<size; i++) {
1269       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1270       rlen = range[i+1] - range[i];
1271       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1272       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1273     }
1274     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1275   } else {
1276     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1277     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1278     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1279   }
1280   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1281 
1282   /* load up the local column indices */
1283   nzmax = nz; /* th processor needs space a largest processor needs */
1284   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1286   cnt   = 0;
1287   for (i=0; i<mat->rmap->n; i++) {
1288     for (j=B->i[i]; j<B->i[i+1]; j++) {
1289       if ((col = garray[B->j[j]]) > cstart) break;
1290       column_indices[cnt++] = col;
1291     }
1292     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1293     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1294   }
1295   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1296 
1297   /* store the column indices to the file */
1298   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1299   if (!rank) {
1300     MPI_Status status;
1301     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1302     for (i=1; i<size; i++) {
1303       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1304       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1305       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1306       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1307       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1308     }
1309     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1310   } else {
1311     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1312     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1313     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1314     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1315   }
1316   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1317 
1318   /* load up the local column values */
1319   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1320   cnt  = 0;
1321   for (i=0; i<mat->rmap->n; i++) {
1322     for (j=B->i[i]; j<B->i[i+1]; j++) {
1323       if (garray[B->j[j]] > cstart) break;
1324       column_values[cnt++] = B->a[j];
1325     }
1326     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1327     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1328   }
1329   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1330 
1331   /* store the column values to the file */
1332   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1333   if (!rank) {
1334     MPI_Status status;
1335     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1336     for (i=1; i<size; i++) {
1337       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1338       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1339       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1340       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1342     }
1343     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1344   } else {
1345     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1346     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1348     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1349   }
1350   ierr = PetscFree(column_values);CHKERRQ(ierr);
1351 
1352   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1353   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #include <petscdraw.h>
1358 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1359 {
1360   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1361   PetscErrorCode    ierr;
1362   PetscMPIInt       rank = aij->rank,size = aij->size;
1363   PetscBool         isdraw,iascii,isbinary;
1364   PetscViewer       sviewer;
1365   PetscViewerFormat format;
1366 
1367   PetscFunctionBegin;
1368   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1369   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1370   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1371   if (iascii) {
1372     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1373     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1374       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1375       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1376       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1377       for (i=0; i<(PetscInt)size; i++) {
1378         nmax = PetscMax(nmax,nz[i]);
1379         nmin = PetscMin(nmin,nz[i]);
1380         navg += nz[i];
1381       }
1382       ierr = PetscFree(nz);CHKERRQ(ierr);
1383       navg = navg/size;
1384       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1385       PetscFunctionReturn(0);
1386     }
1387     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1388     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1389       MatInfo   info;
1390       PetscBool inodes;
1391 
1392       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1393       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1394       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1395       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1396       if (!inodes) {
1397         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1398                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1399       } else {
1400         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1401                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1402       }
1403       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1404       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1405       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1406       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1407       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1408       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1409       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1410       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1411       PetscFunctionReturn(0);
1412     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1413       PetscInt inodecount,inodelimit,*inodes;
1414       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1415       if (inodes) {
1416         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1417       } else {
1418         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1419       }
1420       PetscFunctionReturn(0);
1421     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1422       PetscFunctionReturn(0);
1423     }
1424   } else if (isbinary) {
1425     if (size == 1) {
1426       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1427       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1428     } else {
1429       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1430     }
1431     PetscFunctionReturn(0);
1432   } else if (isdraw) {
1433     PetscDraw draw;
1434     PetscBool isnull;
1435     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1436     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1437     if (isnull) PetscFunctionReturn(0);
1438   }
1439 
1440   {
1441     /* assemble the entire matrix onto first processor. */
1442     Mat        A;
1443     Mat_SeqAIJ *Aloc;
1444     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1445     MatScalar  *a;
1446 
1447     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1448     if (!rank) {
1449       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1450     } else {
1451       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1452     }
1453     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1454     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1455     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1456     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1457     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1458 
1459     /* copy over the A part */
1460     Aloc = (Mat_SeqAIJ*)aij->A->data;
1461     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1462     row  = mat->rmap->rstart;
1463     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1464     for (i=0; i<m; i++) {
1465       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1466       row++;
1467       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1468     }
1469     aj = Aloc->j;
1470     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1471 
1472     /* copy over the B part */
1473     Aloc = (Mat_SeqAIJ*)aij->B->data;
1474     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475     row  = mat->rmap->rstart;
1476     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1477     ct   = cols;
1478     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1479     for (i=0; i<m; i++) {
1480       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1481       row++;
1482       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1483     }
1484     ierr = PetscFree(ct);CHKERRQ(ierr);
1485     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1486     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1487     /*
1488        Everyone has to call to draw the matrix since the graphics waits are
1489        synchronized across all processors that share the PetscDraw object
1490     */
1491     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1492     if (!rank) {
1493       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1495     }
1496     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1497     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1498     ierr = MatDestroy(&A);CHKERRQ(ierr);
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1504 {
1505   PetscErrorCode ierr;
1506   PetscBool      iascii,isdraw,issocket,isbinary;
1507 
1508   PetscFunctionBegin;
1509   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1510   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1511   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1512   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1513   if (iascii || isdraw || isbinary || issocket) {
1514     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1515   }
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1520 {
1521   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1522   PetscErrorCode ierr;
1523   Vec            bb1 = 0;
1524   PetscBool      hasop;
1525 
1526   PetscFunctionBegin;
1527   if (flag == SOR_APPLY_UPPER) {
1528     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1529     PetscFunctionReturn(0);
1530   }
1531 
1532   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1533     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1534   }
1535 
1536   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1537     if (flag & SOR_ZERO_INITIAL_GUESS) {
1538       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1539       its--;
1540     }
1541 
1542     while (its--) {
1543       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545 
1546       /* update rhs: bb1 = bb - B*x */
1547       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1548       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1549 
1550       /* local sweep */
1551       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1552     }
1553   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1554     if (flag & SOR_ZERO_INITIAL_GUESS) {
1555       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1556       its--;
1557     }
1558     while (its--) {
1559       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1560       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1561 
1562       /* update rhs: bb1 = bb - B*x */
1563       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1564       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1565 
1566       /* local sweep */
1567       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1568     }
1569   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1570     if (flag & SOR_ZERO_INITIAL_GUESS) {
1571       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1572       its--;
1573     }
1574     while (its--) {
1575       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1576       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577 
1578       /* update rhs: bb1 = bb - B*x */
1579       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1580       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1581 
1582       /* local sweep */
1583       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1584     }
1585   } else if (flag & SOR_EISENSTAT) {
1586     Vec xx1;
1587 
1588     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1589     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1590 
1591     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1592     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593     if (!mat->diag) {
1594       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1595       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1596     }
1597     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1598     if (hasop) {
1599       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1600     } else {
1601       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1602     }
1603     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1604 
1605     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1606 
1607     /* local sweep */
1608     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1609     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1610     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1611   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1612 
1613   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1614 
1615   matin->factorerrortype = mat->A->factorerrortype;
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1620 {
1621   Mat            aA,aB,Aperm;
1622   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1623   PetscScalar    *aa,*ba;
1624   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1625   PetscSF        rowsf,sf;
1626   IS             parcolp = NULL;
1627   PetscBool      done;
1628   PetscErrorCode ierr;
1629 
1630   PetscFunctionBegin;
1631   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1632   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1633   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1634   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1635 
1636   /* Invert row permutation to find out where my rows should go */
1637   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1638   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1639   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1640   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1641   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1642   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1643 
1644   /* Invert column permutation to find out where my columns should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1648   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1649   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1651   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1652 
1653   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1654   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1655   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1656 
1657   /* Find out where my gcols should go */
1658   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1659   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1660   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1661   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1662   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1663   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1664   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1665   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1666 
1667   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1668   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1669   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1670   for (i=0; i<m; i++) {
1671     PetscInt row = rdest[i],rowner;
1672     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1673     for (j=ai[i]; j<ai[i+1]; j++) {
1674       PetscInt cowner,col = cdest[aj[j]];
1675       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1676       if (rowner == cowner) dnnz[i]++;
1677       else onnz[i]++;
1678     }
1679     for (j=bi[i]; j<bi[i+1]; j++) {
1680       PetscInt cowner,col = gcdest[bj[j]];
1681       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1682       if (rowner == cowner) dnnz[i]++;
1683       else onnz[i]++;
1684     }
1685   }
1686   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1687   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1688   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1689   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1690   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1691 
1692   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1693   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1694   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1695   for (i=0; i<m; i++) {
1696     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1697     PetscInt j0,rowlen;
1698     rowlen = ai[i+1] - ai[i];
1699     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1700       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1701       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1702     }
1703     rowlen = bi[i+1] - bi[i];
1704     for (j0=j=0; j<rowlen; j0=j) {
1705       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1706       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1707     }
1708   }
1709   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1710   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1711   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1712   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1713   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1714   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1715   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1716   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1717   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1718   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1719   *B = Aperm;
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1724 {
1725   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1726   PetscErrorCode ierr;
1727 
1728   PetscFunctionBegin;
1729   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1730   if (ghosts) *ghosts = aij->garray;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1735 {
1736   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1737   Mat            A    = mat->A,B = mat->B;
1738   PetscErrorCode ierr;
1739   PetscReal      isend[5],irecv[5];
1740 
1741   PetscFunctionBegin;
1742   info->block_size = 1.0;
1743   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1744 
1745   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1746   isend[3] = info->memory;  isend[4] = info->mallocs;
1747 
1748   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1749 
1750   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1751   isend[3] += info->memory;  isend[4] += info->mallocs;
1752   if (flag == MAT_LOCAL) {
1753     info->nz_used      = isend[0];
1754     info->nz_allocated = isend[1];
1755     info->nz_unneeded  = isend[2];
1756     info->memory       = isend[3];
1757     info->mallocs      = isend[4];
1758   } else if (flag == MAT_GLOBAL_MAX) {
1759     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1760 
1761     info->nz_used      = irecv[0];
1762     info->nz_allocated = irecv[1];
1763     info->nz_unneeded  = irecv[2];
1764     info->memory       = irecv[3];
1765     info->mallocs      = irecv[4];
1766   } else if (flag == MAT_GLOBAL_SUM) {
1767     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1768 
1769     info->nz_used      = irecv[0];
1770     info->nz_allocated = irecv[1];
1771     info->nz_unneeded  = irecv[2];
1772     info->memory       = irecv[3];
1773     info->mallocs      = irecv[4];
1774   }
1775   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1776   info->fill_ratio_needed = 0;
1777   info->factor_mallocs    = 0;
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1782 {
1783   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1784   PetscErrorCode ierr;
1785 
1786   PetscFunctionBegin;
1787   switch (op) {
1788   case MAT_NEW_NONZERO_LOCATIONS:
1789   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1790   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1791   case MAT_KEEP_NONZERO_PATTERN:
1792   case MAT_NEW_NONZERO_LOCATION_ERR:
1793   case MAT_USE_INODES:
1794   case MAT_IGNORE_ZERO_ENTRIES:
1795     MatCheckPreallocated(A,1);
1796     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1797     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1798     break;
1799   case MAT_ROW_ORIENTED:
1800     MatCheckPreallocated(A,1);
1801     a->roworiented = flg;
1802 
1803     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1804     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1805     break;
1806   case MAT_NEW_DIAGONALS:
1807     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1808     break;
1809   case MAT_IGNORE_OFF_PROC_ENTRIES:
1810     a->donotstash = flg;
1811     break;
1812   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1813   case MAT_SPD:
1814   case MAT_SYMMETRIC:
1815   case MAT_STRUCTURALLY_SYMMETRIC:
1816   case MAT_HERMITIAN:
1817   case MAT_SYMMETRY_ETERNAL:
1818     break;
1819   case MAT_SUBMAT_SINGLEIS:
1820     A->submat_singleis = flg;
1821     break;
1822   case MAT_STRUCTURE_ONLY:
1823     /* The option is handled directly by MatSetOption() */
1824     break;
1825   default:
1826     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1827   }
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1832 {
1833   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1834   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1835   PetscErrorCode ierr;
1836   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1837   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1838   PetscInt       *cmap,*idx_p;
1839 
1840   PetscFunctionBegin;
1841   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1842   mat->getrowactive = PETSC_TRUE;
1843 
1844   if (!mat->rowvalues && (idx || v)) {
1845     /*
1846         allocate enough space to hold information from the longest row.
1847     */
1848     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1849     PetscInt   max = 1,tmp;
1850     for (i=0; i<matin->rmap->n; i++) {
1851       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1852       if (max < tmp) max = tmp;
1853     }
1854     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1855   }
1856 
1857   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1858   lrow = row - rstart;
1859 
1860   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1861   if (!v)   {pvA = 0; pvB = 0;}
1862   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1863   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1864   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1865   nztot = nzA + nzB;
1866 
1867   cmap = mat->garray;
1868   if (v  || idx) {
1869     if (nztot) {
1870       /* Sort by increasing column numbers, assuming A and B already sorted */
1871       PetscInt imark = -1;
1872       if (v) {
1873         *v = v_p = mat->rowvalues;
1874         for (i=0; i<nzB; i++) {
1875           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1876           else break;
1877         }
1878         imark = i;
1879         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1880         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1881       }
1882       if (idx) {
1883         *idx = idx_p = mat->rowindices;
1884         if (imark > -1) {
1885           for (i=0; i<imark; i++) {
1886             idx_p[i] = cmap[cworkB[i]];
1887           }
1888         } else {
1889           for (i=0; i<nzB; i++) {
1890             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1891             else break;
1892           }
1893           imark = i;
1894         }
1895         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1896         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1897       }
1898     } else {
1899       if (idx) *idx = 0;
1900       if (v)   *v   = 0;
1901     }
1902   }
1903   *nz  = nztot;
1904   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1905   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1910 {
1911   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1912 
1913   PetscFunctionBegin;
1914   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1915   aij->getrowactive = PETSC_FALSE;
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1920 {
1921   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1922   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1923   PetscErrorCode ierr;
1924   PetscInt       i,j,cstart = mat->cmap->rstart;
1925   PetscReal      sum = 0.0;
1926   MatScalar      *v;
1927 
1928   PetscFunctionBegin;
1929   if (aij->size == 1) {
1930     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1931   } else {
1932     if (type == NORM_FROBENIUS) {
1933       v = amat->a;
1934       for (i=0; i<amat->nz; i++) {
1935         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1936       }
1937       v = bmat->a;
1938       for (i=0; i<bmat->nz; i++) {
1939         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1940       }
1941       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1942       *norm = PetscSqrtReal(*norm);
1943       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1944     } else if (type == NORM_1) { /* max column norm */
1945       PetscReal *tmp,*tmp2;
1946       PetscInt  *jj,*garray = aij->garray;
1947       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1948       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1949       *norm = 0.0;
1950       v     = amat->a; jj = amat->j;
1951       for (j=0; j<amat->nz; j++) {
1952         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1953       }
1954       v = bmat->a; jj = bmat->j;
1955       for (j=0; j<bmat->nz; j++) {
1956         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1957       }
1958       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1959       for (j=0; j<mat->cmap->N; j++) {
1960         if (tmp2[j] > *norm) *norm = tmp2[j];
1961       }
1962       ierr = PetscFree(tmp);CHKERRQ(ierr);
1963       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1964       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1965     } else if (type == NORM_INFINITY) { /* max row norm */
1966       PetscReal ntemp = 0.0;
1967       for (j=0; j<aij->A->rmap->n; j++) {
1968         v   = amat->a + amat->i[j];
1969         sum = 0.0;
1970         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1971           sum += PetscAbsScalar(*v); v++;
1972         }
1973         v = bmat->a + bmat->i[j];
1974         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1975           sum += PetscAbsScalar(*v); v++;
1976         }
1977         if (sum > ntemp) ntemp = sum;
1978       }
1979       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1980       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1981     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1982   }
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1987 {
1988   Mat_MPIAIJ     *a    =(Mat_MPIAIJ*)A->data,*b;
1989   Mat_SeqAIJ     *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1990   PetscInt       M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol;
1991   PetscErrorCode ierr;
1992   Mat            B,A_diag,*B_diag;
1993   MatScalar      *array;
1994 
1995   PetscFunctionBegin;
1996   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1997   ai = Aloc->i; aj = Aloc->j;
1998   bi = Bloc->i; bj = Bloc->j;
1999   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2000     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2001     PetscSFNode          *oloc;
2002     PETSC_UNUSED PetscSF sf;
2003 
2004     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2005     /* compute d_nnz for preallocation */
2006     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2007     for (i=0; i<ai[ma]; i++) {
2008       d_nnz[aj[i]]++;
2009     }
2010     /* compute local off-diagonal contributions */
2011     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2012     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2013     /* map those to global */
2014     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2015     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2016     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2017     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2018     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2019     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2020     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2021 
2022     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2023     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2024     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2025     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2026     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2027     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2028   } else {
2029     B    = *matout;
2030     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2031   }
2032 
2033   b           = (Mat_MPIAIJ*)B->data;
2034   A_diag      = a->A;
2035   B_diag      = &b->A;
2036   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2037   A_diag_ncol = A_diag->cmap->N;
2038   B_diag_ilen = sub_B_diag->ilen;
2039   B_diag_i    = sub_B_diag->i;
2040 
2041   /* Set ilen for diagonal of B */
2042   for (i=0; i<A_diag_ncol; i++) {
2043     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2044   }
2045 
2046   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2047   very quickly (=without using MatSetValues), because all writes are local. */
2048   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2049 
2050   /* copy over the B part */
2051   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2052   array = Bloc->a;
2053   row   = A->rmap->rstart;
2054   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2055   cols_tmp = cols;
2056   for (i=0; i<mb; i++) {
2057     ncol = bi[i+1]-bi[i];
2058     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2059     row++;
2060     array += ncol; cols_tmp += ncol;
2061   }
2062   ierr = PetscFree(cols);CHKERRQ(ierr);
2063 
2064   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2067     *matout = B;
2068   } else {
2069     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2075 {
2076   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2077   Mat            a    = aij->A,b = aij->B;
2078   PetscErrorCode ierr;
2079   PetscInt       s1,s2,s3;
2080 
2081   PetscFunctionBegin;
2082   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2083   if (rr) {
2084     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2085     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2086     /* Overlap communication with computation. */
2087     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2088   }
2089   if (ll) {
2090     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2091     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2092     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2093   }
2094   /* scale  the diagonal block */
2095   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2096 
2097   if (rr) {
2098     /* Do a scatter end and then right scale the off-diagonal block */
2099     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2100     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2106 {
2107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2116 {
2117   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2118   Mat            a,b,c,d;
2119   PetscBool      flg;
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   a = matA->A; b = matA->B;
2124   c = matB->A; d = matB->B;
2125 
2126   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2127   if (flg) {
2128     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2129   }
2130   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2135 {
2136   PetscErrorCode ierr;
2137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2138   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2139 
2140   PetscFunctionBegin;
2141   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2142   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2143     /* because of the column compression in the off-processor part of the matrix a->B,
2144        the number of columns in a->B and b->B may be different, hence we cannot call
2145        the MatCopy() directly on the two parts. If need be, we can provide a more
2146        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2147        then copying the submatrices */
2148     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2149   } else {
2150     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2151     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2152   }
2153   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2158 {
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /*
2167    Computes the number of nonzeros per row needed for preallocation when X and Y
2168    have different nonzero structure.
2169 */
2170 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2171 {
2172   PetscInt       i,j,k,nzx,nzy;
2173 
2174   PetscFunctionBegin;
2175   /* Set the number of nonzeros in the new matrix */
2176   for (i=0; i<m; i++) {
2177     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2178     nzx = xi[i+1] - xi[i];
2179     nzy = yi[i+1] - yi[i];
2180     nnz[i] = 0;
2181     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2182       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2183       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2184       nnz[i]++;
2185     }
2186     for (; k<nzy; k++) nnz[i]++;
2187   }
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2192 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2193 {
2194   PetscErrorCode ierr;
2195   PetscInt       m = Y->rmap->N;
2196   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2197   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2198 
2199   PetscFunctionBegin;
2200   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2205 {
2206   PetscErrorCode ierr;
2207   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2208   PetscBLASInt   bnz,one=1;
2209   Mat_SeqAIJ     *x,*y;
2210 
2211   PetscFunctionBegin;
2212   if (str == SAME_NONZERO_PATTERN) {
2213     PetscScalar alpha = a;
2214     x    = (Mat_SeqAIJ*)xx->A->data;
2215     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2216     y    = (Mat_SeqAIJ*)yy->A->data;
2217     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2218     x    = (Mat_SeqAIJ*)xx->B->data;
2219     y    = (Mat_SeqAIJ*)yy->B->data;
2220     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2221     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2222     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2223   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2224     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2225   } else {
2226     Mat      B;
2227     PetscInt *nnz_d,*nnz_o;
2228     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2229     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2230     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2231     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2232     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2233     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2234     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2235     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2236     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2237     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2238     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2239     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2240     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2241     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2242   }
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2247 
2248 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2249 {
2250 #if defined(PETSC_USE_COMPLEX)
2251   PetscErrorCode ierr;
2252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2253 
2254   PetscFunctionBegin;
2255   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2256   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2257 #else
2258   PetscFunctionBegin;
2259 #endif
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2264 {
2265   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2266   PetscErrorCode ierr;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2270   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2275 {
2276   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2277   PetscErrorCode ierr;
2278 
2279   PetscFunctionBegin;
2280   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2281   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2286 {
2287   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2288   PetscErrorCode ierr;
2289   PetscInt       i,*idxb = 0;
2290   PetscScalar    *va,*vb;
2291   Vec            vtmp;
2292 
2293   PetscFunctionBegin;
2294   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2295   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2296   if (idx) {
2297     for (i=0; i<A->rmap->n; i++) {
2298       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2299     }
2300   }
2301 
2302   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2303   if (idx) {
2304     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2305   }
2306   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2307   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2308 
2309   for (i=0; i<A->rmap->n; i++) {
2310     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2311       va[i] = vb[i];
2312       if (idx) idx[i] = a->garray[idxb[i]];
2313     }
2314   }
2315 
2316   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2317   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2318   ierr = PetscFree(idxb);CHKERRQ(ierr);
2319   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2324 {
2325   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2326   PetscErrorCode ierr;
2327   PetscInt       i,*idxb = 0;
2328   PetscScalar    *va,*vb;
2329   Vec            vtmp;
2330 
2331   PetscFunctionBegin;
2332   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2333   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2334   if (idx) {
2335     for (i=0; i<A->cmap->n; i++) {
2336       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2337     }
2338   }
2339 
2340   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2341   if (idx) {
2342     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2343   }
2344   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2345   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2346 
2347   for (i=0; i<A->rmap->n; i++) {
2348     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2349       va[i] = vb[i];
2350       if (idx) idx[i] = a->garray[idxb[i]];
2351     }
2352   }
2353 
2354   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2355   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2356   ierr = PetscFree(idxb);CHKERRQ(ierr);
2357   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2362 {
2363   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2364   PetscInt       n      = A->rmap->n;
2365   PetscInt       cstart = A->cmap->rstart;
2366   PetscInt       *cmap  = mat->garray;
2367   PetscInt       *diagIdx, *offdiagIdx;
2368   Vec            diagV, offdiagV;
2369   PetscScalar    *a, *diagA, *offdiagA;
2370   PetscInt       r;
2371   PetscErrorCode ierr;
2372 
2373   PetscFunctionBegin;
2374   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2375   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2376   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2377   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2378   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2379   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2380   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2381   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2382   for (r = 0; r < n; ++r) {
2383     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2384       a[r]   = diagA[r];
2385       idx[r] = cstart + diagIdx[r];
2386     } else {
2387       a[r]   = offdiagA[r];
2388       idx[r] = cmap[offdiagIdx[r]];
2389     }
2390   }
2391   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2392   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2393   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2394   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2395   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2396   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2401 {
2402   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2403   PetscInt       n      = A->rmap->n;
2404   PetscInt       cstart = A->cmap->rstart;
2405   PetscInt       *cmap  = mat->garray;
2406   PetscInt       *diagIdx, *offdiagIdx;
2407   Vec            diagV, offdiagV;
2408   PetscScalar    *a, *diagA, *offdiagA;
2409   PetscInt       r;
2410   PetscErrorCode ierr;
2411 
2412   PetscFunctionBegin;
2413   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2414   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2415   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2416   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2417   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2418   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2419   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2420   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2421   for (r = 0; r < n; ++r) {
2422     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2423       a[r]   = diagA[r];
2424       idx[r] = cstart + diagIdx[r];
2425     } else {
2426       a[r]   = offdiagA[r];
2427       idx[r] = cmap[offdiagIdx[r]];
2428     }
2429   }
2430   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2431   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2432   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2433   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2434   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2435   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2440 {
2441   PetscErrorCode ierr;
2442   Mat            *dummy;
2443 
2444   PetscFunctionBegin;
2445   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2446   *newmat = *dummy;
2447   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2452 {
2453   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2454   PetscErrorCode ierr;
2455 
2456   PetscFunctionBegin;
2457   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2458   A->factorerrortype = a->A->factorerrortype;
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2463 {
2464   PetscErrorCode ierr;
2465   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2469   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2470   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2471   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2476 {
2477   PetscFunctionBegin;
2478   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2479   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 /*@
2484    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2485 
2486    Collective on Mat
2487 
2488    Input Parameters:
2489 +    A - the matrix
2490 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2491 
2492  Level: advanced
2493 
2494 @*/
2495 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2496 {
2497   PetscErrorCode       ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2505 {
2506   PetscErrorCode       ierr;
2507   PetscBool            sc = PETSC_FALSE,flg;
2508 
2509   PetscFunctionBegin;
2510   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2511   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2512   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2513   if (flg) {
2514     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2515   }
2516   ierr = PetscOptionsTail();CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2521 {
2522   PetscErrorCode ierr;
2523   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2524   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2525 
2526   PetscFunctionBegin;
2527   if (!Y->preallocated) {
2528     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2529   } else if (!aij->nz) {
2530     PetscInt nonew = aij->nonew;
2531     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2532     aij->nonew = nonew;
2533   }
2534   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2539 {
2540   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2541   PetscErrorCode ierr;
2542 
2543   PetscFunctionBegin;
2544   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2545   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2546   if (d) {
2547     PetscInt rstart;
2548     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2549     *d += rstart;
2550 
2551   }
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2556 {
2557   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2558   PetscErrorCode ierr;
2559 
2560   PetscFunctionBegin;
2561   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 /* -------------------------------------------------------------------*/
2566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2567                                        MatGetRow_MPIAIJ,
2568                                        MatRestoreRow_MPIAIJ,
2569                                        MatMult_MPIAIJ,
2570                                 /* 4*/ MatMultAdd_MPIAIJ,
2571                                        MatMultTranspose_MPIAIJ,
2572                                        MatMultTransposeAdd_MPIAIJ,
2573                                        0,
2574                                        0,
2575                                        0,
2576                                 /*10*/ 0,
2577                                        0,
2578                                        0,
2579                                        MatSOR_MPIAIJ,
2580                                        MatTranspose_MPIAIJ,
2581                                 /*15*/ MatGetInfo_MPIAIJ,
2582                                        MatEqual_MPIAIJ,
2583                                        MatGetDiagonal_MPIAIJ,
2584                                        MatDiagonalScale_MPIAIJ,
2585                                        MatNorm_MPIAIJ,
2586                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2587                                        MatAssemblyEnd_MPIAIJ,
2588                                        MatSetOption_MPIAIJ,
2589                                        MatZeroEntries_MPIAIJ,
2590                                 /*24*/ MatZeroRows_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*29*/ MatSetUp_MPIAIJ,
2596                                        0,
2597                                        0,
2598                                        MatGetDiagonalBlock_MPIAIJ,
2599                                        0,
2600                                 /*34*/ MatDuplicate_MPIAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*39*/ MatAXPY_MPIAIJ,
2606                                        MatCreateSubMatrices_MPIAIJ,
2607                                        MatIncreaseOverlap_MPIAIJ,
2608                                        MatGetValues_MPIAIJ,
2609                                        MatCopy_MPIAIJ,
2610                                 /*44*/ MatGetRowMax_MPIAIJ,
2611                                        MatScale_MPIAIJ,
2612                                        MatShift_MPIAIJ,
2613                                        MatDiagonalSet_MPIAIJ,
2614                                        MatZeroRowsColumns_MPIAIJ,
2615                                 /*49*/ MatSetRandom_MPIAIJ,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2621                                        0,
2622                                        MatSetUnfactored_MPIAIJ,
2623                                        MatPermute_MPIAIJ,
2624                                        0,
2625                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2626                                        MatDestroy_MPIAIJ,
2627                                        MatView_MPIAIJ,
2628                                        0,
2629                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2630                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2631                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2636                                        MatGetRowMinAbs_MPIAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*75*/ MatFDColoringApply_AIJ,
2642                                        MatSetFromOptions_MPIAIJ,
2643                                        0,
2644                                        0,
2645                                        MatFindZeroDiagonals_MPIAIJ,
2646                                 /*80*/ 0,
2647                                        0,
2648                                        0,
2649                                 /*83*/ MatLoad_MPIAIJ,
2650                                        MatIsSymmetric_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2656                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2657                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2658                                        MatPtAP_MPIAIJ_MPIAIJ,
2659                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2660                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*99*/ 0,
2666                                        0,
2667                                        0,
2668                                        MatConjugate_MPIAIJ,
2669                                        0,
2670                                 /*104*/MatSetValuesRow_MPIAIJ,
2671                                        MatRealPart_MPIAIJ,
2672                                        MatImaginaryPart_MPIAIJ,
2673                                        0,
2674                                        0,
2675                                 /*109*/0,
2676                                        0,
2677                                        MatGetRowMin_MPIAIJ,
2678                                        0,
2679                                        MatMissingDiagonal_MPIAIJ,
2680                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2681                                        0,
2682                                        MatGetGhosts_MPIAIJ,
2683                                        0,
2684                                        0,
2685                                 /*119*/0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        MatGetMultiProcBlock_MPIAIJ,
2690                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2691                                        MatGetColumnNorms_MPIAIJ,
2692                                        MatInvertBlockDiagonal_MPIAIJ,
2693                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2694                                        MatCreateSubMatricesMPI_MPIAIJ,
2695                                 /*129*/0,
2696                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2697                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2698                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2699                                        0,
2700                                 /*134*/0,
2701                                        0,
2702                                        MatRARt_MPIAIJ_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                 /*139*/MatSetBlockSizes_MPIAIJ,
2706                                        0,
2707                                        0,
2708                                        MatFDColoringSetUp_MPIXAIJ,
2709                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2710                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2711 };
2712 
2713 /* ----------------------------------------------------------------------------------------*/
2714 
2715 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2716 {
2717   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2722   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2738 {
2739   Mat_MPIAIJ     *b;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2744   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2745   b = (Mat_MPIAIJ*)B->data;
2746 
2747 #if defined(PETSC_USE_CTABLE)
2748   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2749 #else
2750   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2751 #endif
2752   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2753   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2754   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2755 
2756   /* Because the B will have been resized we simply destroy it and create a new one each time */
2757   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2758   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2759   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2760   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2761   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2762   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2763 
2764   if (!B->preallocated) {
2765     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2766     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2767     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2768     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2769     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2770   }
2771 
2772   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2773   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2774   B->preallocated  = PETSC_TRUE;
2775   B->was_assembled = PETSC_FALSE;
2776   B->assembled     = PETSC_FALSE;
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2781 {
2782   Mat_MPIAIJ     *b;
2783   PetscErrorCode ierr;
2784 
2785   PetscFunctionBegin;
2786   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2787   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2788   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2789   b = (Mat_MPIAIJ*)B->data;
2790 
2791 #if defined(PETSC_USE_CTABLE)
2792   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2793 #else
2794   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2795 #endif
2796   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2797   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2798   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2799 
2800   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2801   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2802   B->preallocated  = PETSC_TRUE;
2803   B->was_assembled = PETSC_FALSE;
2804   B->assembled = PETSC_FALSE;
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2809 {
2810   Mat            mat;
2811   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   *newmat = 0;
2816   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2817   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2818   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2819   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2820   a       = (Mat_MPIAIJ*)mat->data;
2821 
2822   mat->factortype   = matin->factortype;
2823   mat->assembled    = PETSC_TRUE;
2824   mat->insertmode   = NOT_SET_VALUES;
2825   mat->preallocated = PETSC_TRUE;
2826 
2827   a->size         = oldmat->size;
2828   a->rank         = oldmat->rank;
2829   a->donotstash   = oldmat->donotstash;
2830   a->roworiented  = oldmat->roworiented;
2831   a->rowindices   = 0;
2832   a->rowvalues    = 0;
2833   a->getrowactive = PETSC_FALSE;
2834 
2835   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2836   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2837 
2838   if (oldmat->colmap) {
2839 #if defined(PETSC_USE_CTABLE)
2840     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2841 #else
2842     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2843     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2844     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2845 #endif
2846   } else a->colmap = 0;
2847   if (oldmat->garray) {
2848     PetscInt len;
2849     len  = oldmat->B->cmap->n;
2850     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2851     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2852     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2853   } else a->garray = 0;
2854 
2855   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2856   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2857   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2858   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2859 
2860   if (oldmat->Mvctx_mpi1) {
2861     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2862     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2863   }
2864 
2865   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2866   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2867   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2868   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2869   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2870   *newmat = mat;
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2875 {
2876   PetscBool      isbinary, ishdf5;
2877   PetscErrorCode ierr;
2878 
2879   PetscFunctionBegin;
2880   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2881   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2882   /* force binary viewer to load .info file if it has not yet done so */
2883   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2884   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2885   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2886   if (isbinary) {
2887     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2888   } else if (ishdf5) {
2889 #if defined(PETSC_HAVE_HDF5)
2890     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2891 #else
2892     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2893 #endif
2894   } else {
2895     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2896   }
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2901 {
2902   PetscScalar    *vals,*svals;
2903   MPI_Comm       comm;
2904   PetscErrorCode ierr;
2905   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2906   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2907   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2908   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2909   PetscInt       cend,cstart,n,*rowners;
2910   int            fd;
2911   PetscInt       bs = newMat->rmap->bs;
2912 
2913   PetscFunctionBegin;
2914   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2915   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2916   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2917   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2918   if (!rank) {
2919     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2920     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2921     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2922   }
2923 
2924   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2925   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2926   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2927   if (bs < 0) bs = 1;
2928 
2929   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2930   M    = header[1]; N = header[2];
2931 
2932   /* If global sizes are set, check if they are consistent with that given in the file */
2933   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2934   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2935 
2936   /* determine ownership of all (block) rows */
2937   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2938   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2939   else m = newMat->rmap->n; /* Set by user */
2940 
2941   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2942   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2943 
2944   /* First process needs enough room for process with most rows */
2945   if (!rank) {
2946     mmax = rowners[1];
2947     for (i=2; i<=size; i++) {
2948       mmax = PetscMax(mmax, rowners[i]);
2949     }
2950   } else mmax = -1;             /* unused, but compilers complain */
2951 
2952   rowners[0] = 0;
2953   for (i=2; i<=size; i++) {
2954     rowners[i] += rowners[i-1];
2955   }
2956   rstart = rowners[rank];
2957   rend   = rowners[rank+1];
2958 
2959   /* distribute row lengths to all processors */
2960   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2961   if (!rank) {
2962     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2963     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2964     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2965     for (j=0; j<m; j++) {
2966       procsnz[0] += ourlens[j];
2967     }
2968     for (i=1; i<size; i++) {
2969       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2970       /* calculate the number of nonzeros on each processor */
2971       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2972         procsnz[i] += rowlengths[j];
2973       }
2974       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2975     }
2976     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2977   } else {
2978     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2979   }
2980 
2981   if (!rank) {
2982     /* determine max buffer needed and allocate it */
2983     maxnz = 0;
2984     for (i=0; i<size; i++) {
2985       maxnz = PetscMax(maxnz,procsnz[i]);
2986     }
2987     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2988 
2989     /* read in my part of the matrix column indices  */
2990     nz   = procsnz[0];
2991     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2992     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2993 
2994     /* read in every one elses and ship off */
2995     for (i=1; i<size; i++) {
2996       nz   = procsnz[i];
2997       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2998       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2999     }
3000     ierr = PetscFree(cols);CHKERRQ(ierr);
3001   } else {
3002     /* determine buffer space needed for message */
3003     nz = 0;
3004     for (i=0; i<m; i++) {
3005       nz += ourlens[i];
3006     }
3007     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3008 
3009     /* receive message of column indices*/
3010     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3011   }
3012 
3013   /* determine column ownership if matrix is not square */
3014   if (N != M) {
3015     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3016     else n = newMat->cmap->n;
3017     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3018     cstart = cend - n;
3019   } else {
3020     cstart = rstart;
3021     cend   = rend;
3022     n      = cend - cstart;
3023   }
3024 
3025   /* loop over local rows, determining number of off diagonal entries */
3026   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3027   jj   = 0;
3028   for (i=0; i<m; i++) {
3029     for (j=0; j<ourlens[i]; j++) {
3030       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3031       jj++;
3032     }
3033   }
3034 
3035   for (i=0; i<m; i++) {
3036     ourlens[i] -= offlens[i];
3037   }
3038   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3039 
3040   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3041 
3042   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3043 
3044   for (i=0; i<m; i++) {
3045     ourlens[i] += offlens[i];
3046   }
3047 
3048   if (!rank) {
3049     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3050 
3051     /* read in my part of the matrix numerical values  */
3052     nz   = procsnz[0];
3053     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3054 
3055     /* insert into matrix */
3056     jj      = rstart;
3057     smycols = mycols;
3058     svals   = vals;
3059     for (i=0; i<m; i++) {
3060       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3061       smycols += ourlens[i];
3062       svals   += ourlens[i];
3063       jj++;
3064     }
3065 
3066     /* read in other processors and ship out */
3067     for (i=1; i<size; i++) {
3068       nz   = procsnz[i];
3069       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3070       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3071     }
3072     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3073   } else {
3074     /* receive numeric values */
3075     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3076 
3077     /* receive message of values*/
3078     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3079 
3080     /* insert into matrix */
3081     jj      = rstart;
3082     smycols = mycols;
3083     svals   = vals;
3084     for (i=0; i<m; i++) {
3085       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3086       smycols += ourlens[i];
3087       svals   += ourlens[i];
3088       jj++;
3089     }
3090   }
3091   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3092   ierr = PetscFree(vals);CHKERRQ(ierr);
3093   ierr = PetscFree(mycols);CHKERRQ(ierr);
3094   ierr = PetscFree(rowners);CHKERRQ(ierr);
3095   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3096   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 /* Not scalable because of ISAllGather() unless getting all columns. */
3101 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3102 {
3103   PetscErrorCode ierr;
3104   IS             iscol_local;
3105   PetscBool      isstride;
3106   PetscMPIInt    lisstride=0,gisstride;
3107 
3108   PetscFunctionBegin;
3109   /* check if we are grabbing all columns*/
3110   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3111 
3112   if (isstride) {
3113     PetscInt  start,len,mstart,mlen;
3114     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3115     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3116     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3117     if (mstart == start && mlen-mstart == len) lisstride = 1;
3118   }
3119 
3120   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3121   if (gisstride) {
3122     PetscInt N;
3123     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3124     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3125     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3126     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3127   } else {
3128     PetscInt cbs;
3129     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3130     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3131     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3132   }
3133 
3134   *isseq = iscol_local;
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 /*
3139  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3140  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3141 
3142  Input Parameters:
3143    mat - matrix
3144    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3145            i.e., mat->rstart <= isrow[i] < mat->rend
3146    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3147            i.e., mat->cstart <= iscol[i] < mat->cend
3148  Output Parameter:
3149    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3150    iscol_o - sequential column index set for retrieving mat->B
3151    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3152  */
3153 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3154 {
3155   PetscErrorCode ierr;
3156   Vec            x,cmap;
3157   const PetscInt *is_idx;
3158   PetscScalar    *xarray,*cmaparray;
3159   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3160   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3161   Mat            B=a->B;
3162   Vec            lvec=a->lvec,lcmap;
3163   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3164   MPI_Comm       comm;
3165   VecScatter     Mvctx=a->Mvctx;
3166 
3167   PetscFunctionBegin;
3168   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3169   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3170 
3171   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3172   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3173   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3174   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3175   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3176 
3177   /* Get start indices */
3178   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3179   isstart -= ncols;
3180   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3181 
3182   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3183   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3184   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3185   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3186   for (i=0; i<ncols; i++) {
3187     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3188     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3189     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3190   }
3191   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3192   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3193   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3194 
3195   /* Get iscol_d */
3196   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3197   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3198   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3199 
3200   /* Get isrow_d */
3201   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3202   rstart = mat->rmap->rstart;
3203   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3204   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3205   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3206   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3207 
3208   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3209   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3210   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3211 
3212   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3213   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3214   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3215 
3216   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3217 
3218   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3219   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3220 
3221   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3222   /* off-process column indices */
3223   count = 0;
3224   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3225   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3226 
3227   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3228   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3229   for (i=0; i<Bn; i++) {
3230     if (PetscRealPart(xarray[i]) > -1.0) {
3231       idx[count]     = i;                   /* local column index in off-diagonal part B */
3232       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3233       count++;
3234     }
3235   }
3236   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3237   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3238 
3239   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3240   /* cannot ensure iscol_o has same blocksize as iscol! */
3241 
3242   ierr = PetscFree(idx);CHKERRQ(ierr);
3243   *garray = cmap1;
3244 
3245   ierr = VecDestroy(&x);CHKERRQ(ierr);
3246   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3247   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3252 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3253 {
3254   PetscErrorCode ierr;
3255   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3256   Mat            M = NULL;
3257   MPI_Comm       comm;
3258   IS             iscol_d,isrow_d,iscol_o;
3259   Mat            Asub = NULL,Bsub = NULL;
3260   PetscInt       n;
3261 
3262   PetscFunctionBegin;
3263   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3264 
3265   if (call == MAT_REUSE_MATRIX) {
3266     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3267     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3268     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3269 
3270     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3271     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3272 
3273     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3274     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3275 
3276     /* Update diagonal and off-diagonal portions of submat */
3277     asub = (Mat_MPIAIJ*)(*submat)->data;
3278     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3279     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3280     if (n) {
3281       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3282     }
3283     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285 
3286   } else { /* call == MAT_INITIAL_MATRIX) */
3287     const PetscInt *garray;
3288     PetscInt        BsubN;
3289 
3290     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3291     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3292 
3293     /* Create local submatrices Asub and Bsub */
3294     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3295     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3296 
3297     /* Create submatrix M */
3298     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3299 
3300     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3301     asub = (Mat_MPIAIJ*)M->data;
3302 
3303     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3304     n = asub->B->cmap->N;
3305     if (BsubN > n) {
3306       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3307       const PetscInt *idx;
3308       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3309       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3310 
3311       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3312       j = 0;
3313       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3314       for (i=0; i<n; i++) {
3315         if (j >= BsubN) break;
3316         while (subgarray[i] > garray[j]) j++;
3317 
3318         if (subgarray[i] == garray[j]) {
3319           idx_new[i] = idx[j++];
3320         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3321       }
3322       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3323 
3324       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3325       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3326 
3327     } else if (BsubN < n) {
3328       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3329     }
3330 
3331     ierr = PetscFree(garray);CHKERRQ(ierr);
3332     *submat = M;
3333 
3334     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3335     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3336     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3337 
3338     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3339     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3340 
3341     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3342     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3343   }
3344   PetscFunctionReturn(0);
3345 }
3346 
3347 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3348 {
3349   PetscErrorCode ierr;
3350   IS             iscol_local=NULL,isrow_d;
3351   PetscInt       csize;
3352   PetscInt       n,i,j,start,end;
3353   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3354   MPI_Comm       comm;
3355 
3356   PetscFunctionBegin;
3357   /* If isrow has same processor distribution as mat,
3358      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3359   if (call == MAT_REUSE_MATRIX) {
3360     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3361     if (isrow_d) {
3362       sameRowDist  = PETSC_TRUE;
3363       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3364     } else {
3365       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3366       if (iscol_local) {
3367         sameRowDist  = PETSC_TRUE;
3368         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3369       }
3370     }
3371   } else {
3372     /* Check if isrow has same processor distribution as mat */
3373     sameDist[0] = PETSC_FALSE;
3374     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3375     if (!n) {
3376       sameDist[0] = PETSC_TRUE;
3377     } else {
3378       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3379       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3380       if (i >= start && j < end) {
3381         sameDist[0] = PETSC_TRUE;
3382       }
3383     }
3384 
3385     /* Check if iscol has same processor distribution as mat */
3386     sameDist[1] = PETSC_FALSE;
3387     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3388     if (!n) {
3389       sameDist[1] = PETSC_TRUE;
3390     } else {
3391       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3392       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3393       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3394     }
3395 
3396     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3397     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3398     sameRowDist = tsameDist[0];
3399   }
3400 
3401   if (sameRowDist) {
3402     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3403       /* isrow and iscol have same processor distribution as mat */
3404       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3405       PetscFunctionReturn(0);
3406     } else { /* sameRowDist */
3407       /* isrow has same processor distribution as mat */
3408       if (call == MAT_INITIAL_MATRIX) {
3409         PetscBool sorted;
3410         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3411         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3412         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3413         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3414 
3415         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3416         if (sorted) {
3417           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3418           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3419           PetscFunctionReturn(0);
3420         }
3421       } else { /* call == MAT_REUSE_MATRIX */
3422         IS    iscol_sub;
3423         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3424         if (iscol_sub) {
3425           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3426           PetscFunctionReturn(0);
3427         }
3428       }
3429     }
3430   }
3431 
3432   /* General case: iscol -> iscol_local which has global size of iscol */
3433   if (call == MAT_REUSE_MATRIX) {
3434     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3435     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3436   } else {
3437     if (!iscol_local) {
3438       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3439     }
3440   }
3441 
3442   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3443   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3444 
3445   if (call == MAT_INITIAL_MATRIX) {
3446     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3447     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3448   }
3449   PetscFunctionReturn(0);
3450 }
3451 
3452 /*@C
3453      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3454          and "off-diagonal" part of the matrix in CSR format.
3455 
3456    Collective on MPI_Comm
3457 
3458    Input Parameters:
3459 +  comm - MPI communicator
3460 .  A - "diagonal" portion of matrix
3461 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3462 -  garray - global index of B columns
3463 
3464    Output Parameter:
3465 .   mat - the matrix, with input A as its local diagonal matrix
3466    Level: advanced
3467 
3468    Notes:
3469        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3470        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3471 
3472 .seealso: MatCreateMPIAIJWithSplitArrays()
3473 @*/
3474 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3475 {
3476   PetscErrorCode ierr;
3477   Mat_MPIAIJ     *maij;
3478   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3479   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3480   PetscScalar    *oa=b->a;
3481   Mat            Bnew;
3482   PetscInt       m,n,N;
3483 
3484   PetscFunctionBegin;
3485   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3486   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3487   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3488   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3489   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3490   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3491 
3492   /* Get global columns of mat */
3493   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3494 
3495   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3496   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3497   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3498   maij = (Mat_MPIAIJ*)(*mat)->data;
3499 
3500   (*mat)->preallocated = PETSC_TRUE;
3501 
3502   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3503   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3504 
3505   /* Set A as diagonal portion of *mat */
3506   maij->A = A;
3507 
3508   nz = oi[m];
3509   for (i=0; i<nz; i++) {
3510     col   = oj[i];
3511     oj[i] = garray[col];
3512   }
3513 
3514    /* Set Bnew as off-diagonal portion of *mat */
3515   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3516   bnew        = (Mat_SeqAIJ*)Bnew->data;
3517   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3518   maij->B     = Bnew;
3519 
3520   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3521 
3522   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3523   b->free_a       = PETSC_FALSE;
3524   b->free_ij      = PETSC_FALSE;
3525   ierr = MatDestroy(&B);CHKERRQ(ierr);
3526 
3527   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3528   bnew->free_a       = PETSC_TRUE;
3529   bnew->free_ij      = PETSC_TRUE;
3530 
3531   /* condense columns of maij->B */
3532   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3533   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3534   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3535   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3536   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3541 
3542 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3543 {
3544   PetscErrorCode ierr;
3545   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3546   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3547   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3548   Mat            M,Msub,B=a->B;
3549   MatScalar      *aa;
3550   Mat_SeqAIJ     *aij;
3551   PetscInt       *garray = a->garray,*colsub,Ncols;
3552   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3553   IS             iscol_sub,iscmap;
3554   const PetscInt *is_idx,*cmap;
3555   PetscBool      allcolumns=PETSC_FALSE;
3556   MPI_Comm       comm;
3557 
3558   PetscFunctionBegin;
3559   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3560 
3561   if (call == MAT_REUSE_MATRIX) {
3562     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3563     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3564     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3565 
3566     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3567     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3568 
3569     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3570     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3571 
3572     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3573 
3574   } else { /* call == MAT_INITIAL_MATRIX) */
3575     PetscBool flg;
3576 
3577     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3578     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3579 
3580     /* (1) iscol -> nonscalable iscol_local */
3581     /* Check for special case: each processor gets entire matrix columns */
3582     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3583     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3584     if (allcolumns) {
3585       iscol_sub = iscol_local;
3586       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3587       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3588 
3589     } else {
3590       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3591       PetscInt *idx,*cmap1,k;
3592       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3593       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3594       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3595       count = 0;
3596       k     = 0;
3597       for (i=0; i<Ncols; i++) {
3598         j = is_idx[i];
3599         if (j >= cstart && j < cend) {
3600           /* diagonal part of mat */
3601           idx[count]     = j;
3602           cmap1[count++] = i; /* column index in submat */
3603         } else if (Bn) {
3604           /* off-diagonal part of mat */
3605           if (j == garray[k]) {
3606             idx[count]     = j;
3607             cmap1[count++] = i;  /* column index in submat */
3608           } else if (j > garray[k]) {
3609             while (j > garray[k] && k < Bn-1) k++;
3610             if (j == garray[k]) {
3611               idx[count]     = j;
3612               cmap1[count++] = i; /* column index in submat */
3613             }
3614           }
3615         }
3616       }
3617       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3618 
3619       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3620       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3621       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3622 
3623       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3624     }
3625 
3626     /* (3) Create sequential Msub */
3627     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3628   }
3629 
3630   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3631   aij  = (Mat_SeqAIJ*)(Msub)->data;
3632   ii   = aij->i;
3633   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3634 
3635   /*
3636       m - number of local rows
3637       Ncols - number of columns (same on all processors)
3638       rstart - first row in new global matrix generated
3639   */
3640   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3641 
3642   if (call == MAT_INITIAL_MATRIX) {
3643     /* (4) Create parallel newmat */
3644     PetscMPIInt    rank,size;
3645     PetscInt       csize;
3646 
3647     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3648     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3649 
3650     /*
3651         Determine the number of non-zeros in the diagonal and off-diagonal
3652         portions of the matrix in order to do correct preallocation
3653     */
3654 
3655     /* first get start and end of "diagonal" columns */
3656     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3657     if (csize == PETSC_DECIDE) {
3658       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3659       if (mglobal == Ncols) { /* square matrix */
3660         nlocal = m;
3661       } else {
3662         nlocal = Ncols/size + ((Ncols % size) > rank);
3663       }
3664     } else {
3665       nlocal = csize;
3666     }
3667     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3668     rstart = rend - nlocal;
3669     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3670 
3671     /* next, compute all the lengths */
3672     jj    = aij->j;
3673     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3674     olens = dlens + m;
3675     for (i=0; i<m; i++) {
3676       jend = ii[i+1] - ii[i];
3677       olen = 0;
3678       dlen = 0;
3679       for (j=0; j<jend; j++) {
3680         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3681         else dlen++;
3682         jj++;
3683       }
3684       olens[i] = olen;
3685       dlens[i] = dlen;
3686     }
3687 
3688     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3689     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3690 
3691     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3692     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3693     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3694     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3695     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3696     ierr = PetscFree(dlens);CHKERRQ(ierr);
3697 
3698   } else { /* call == MAT_REUSE_MATRIX */
3699     M    = *newmat;
3700     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3701     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3702     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3703     /*
3704          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3705        rather than the slower MatSetValues().
3706     */
3707     M->was_assembled = PETSC_TRUE;
3708     M->assembled     = PETSC_FALSE;
3709   }
3710 
3711   /* (5) Set values of Msub to *newmat */
3712   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3713   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3714 
3715   jj   = aij->j;
3716   aa   = aij->a;
3717   for (i=0; i<m; i++) {
3718     row = rstart + i;
3719     nz  = ii[i+1] - ii[i];
3720     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3721     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3722     jj += nz; aa += nz;
3723   }
3724   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3725 
3726   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3727   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3728 
3729   ierr = PetscFree(colsub);CHKERRQ(ierr);
3730 
3731   /* save Msub, iscol_sub and iscmap used in processor for next request */
3732   if (call ==  MAT_INITIAL_MATRIX) {
3733     *newmat = M;
3734     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3735     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3736 
3737     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3738     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3739 
3740     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3741     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3742 
3743     if (iscol_local) {
3744       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3745       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3746     }
3747   }
3748   PetscFunctionReturn(0);
3749 }
3750 
3751 /*
3752     Not great since it makes two copies of the submatrix, first an SeqAIJ
3753   in local and then by concatenating the local matrices the end result.
3754   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3755 
3756   Note: This requires a sequential iscol with all indices.
3757 */
3758 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3759 {
3760   PetscErrorCode ierr;
3761   PetscMPIInt    rank,size;
3762   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3763   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3764   Mat            M,Mreuse;
3765   MatScalar      *aa,*vwork;
3766   MPI_Comm       comm;
3767   Mat_SeqAIJ     *aij;
3768   PetscBool      colflag,allcolumns=PETSC_FALSE;
3769 
3770   PetscFunctionBegin;
3771   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3772   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3773   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3774 
3775   /* Check for special case: each processor gets entire matrix columns */
3776   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3777   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3778   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3779 
3780   if (call ==  MAT_REUSE_MATRIX) {
3781     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3782     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3783     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3784   } else {
3785     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3786   }
3787 
3788   /*
3789       m - number of local rows
3790       n - number of columns (same on all processors)
3791       rstart - first row in new global matrix generated
3792   */
3793   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3794   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3795   if (call == MAT_INITIAL_MATRIX) {
3796     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3797     ii  = aij->i;
3798     jj  = aij->j;
3799 
3800     /*
3801         Determine the number of non-zeros in the diagonal and off-diagonal
3802         portions of the matrix in order to do correct preallocation
3803     */
3804 
3805     /* first get start and end of "diagonal" columns */
3806     if (csize == PETSC_DECIDE) {
3807       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3808       if (mglobal == n) { /* square matrix */
3809         nlocal = m;
3810       } else {
3811         nlocal = n/size + ((n % size) > rank);
3812       }
3813     } else {
3814       nlocal = csize;
3815     }
3816     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3817     rstart = rend - nlocal;
3818     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3819 
3820     /* next, compute all the lengths */
3821     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3822     olens = dlens + m;
3823     for (i=0; i<m; i++) {
3824       jend = ii[i+1] - ii[i];
3825       olen = 0;
3826       dlen = 0;
3827       for (j=0; j<jend; j++) {
3828         if (*jj < rstart || *jj >= rend) olen++;
3829         else dlen++;
3830         jj++;
3831       }
3832       olens[i] = olen;
3833       dlens[i] = dlen;
3834     }
3835     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3836     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3837     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3838     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3839     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3840     ierr = PetscFree(dlens);CHKERRQ(ierr);
3841   } else {
3842     PetscInt ml,nl;
3843 
3844     M    = *newmat;
3845     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3846     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3847     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3848     /*
3849          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3850        rather than the slower MatSetValues().
3851     */
3852     M->was_assembled = PETSC_TRUE;
3853     M->assembled     = PETSC_FALSE;
3854   }
3855   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3856   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3857   ii   = aij->i;
3858   jj   = aij->j;
3859   aa   = aij->a;
3860   for (i=0; i<m; i++) {
3861     row   = rstart + i;
3862     nz    = ii[i+1] - ii[i];
3863     cwork = jj;     jj += nz;
3864     vwork = aa;     aa += nz;
3865     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3866   }
3867 
3868   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3869   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3870   *newmat = M;
3871 
3872   /* save submatrix used in processor for next request */
3873   if (call ==  MAT_INITIAL_MATRIX) {
3874     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3875     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3876   }
3877   PetscFunctionReturn(0);
3878 }
3879 
3880 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3881 {
3882   PetscInt       m,cstart, cend,j,nnz,i,d;
3883   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3884   const PetscInt *JJ;
3885   PetscScalar    *values;
3886   PetscErrorCode ierr;
3887   PetscBool      nooffprocentries;
3888 
3889   PetscFunctionBegin;
3890   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3891 
3892   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3893   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3894   m      = B->rmap->n;
3895   cstart = B->cmap->rstart;
3896   cend   = B->cmap->rend;
3897   rstart = B->rmap->rstart;
3898 
3899   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3900 
3901 #if defined(PETSC_USE_DEBUG)
3902   for (i=0; i<m && Ii; i++) {
3903     nnz = Ii[i+1]- Ii[i];
3904     JJ  = J + Ii[i];
3905     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3906     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3907     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3908   }
3909 #endif
3910 
3911   for (i=0; i<m && Ii; i++) {
3912     nnz     = Ii[i+1]- Ii[i];
3913     JJ      = J + Ii[i];
3914     nnz_max = PetscMax(nnz_max,nnz);
3915     d       = 0;
3916     for (j=0; j<nnz; j++) {
3917       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3918     }
3919     d_nnz[i] = d;
3920     o_nnz[i] = nnz - d;
3921   }
3922   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3923   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3924 
3925   if (v) values = (PetscScalar*)v;
3926   else {
3927     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3928   }
3929 
3930   for (i=0; i<m && Ii; i++) {
3931     ii   = i + rstart;
3932     nnz  = Ii[i+1]- Ii[i];
3933     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3934   }
3935   nooffprocentries    = B->nooffprocentries;
3936   B->nooffprocentries = PETSC_TRUE;
3937   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3938   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3939   B->nooffprocentries = nooffprocentries;
3940 
3941   if (!v) {
3942     ierr = PetscFree(values);CHKERRQ(ierr);
3943   }
3944   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3945   PetscFunctionReturn(0);
3946 }
3947 
3948 /*@
3949    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3950    (the default parallel PETSc format).
3951 
3952    Collective on MPI_Comm
3953 
3954    Input Parameters:
3955 +  B - the matrix
3956 .  i - the indices into j for the start of each local row (starts with zero)
3957 .  j - the column indices for each local row (starts with zero)
3958 -  v - optional values in the matrix
3959 
3960    Level: developer
3961 
3962    Notes:
3963        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3964      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3965      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3966 
3967        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3968 
3969        The format which is used for the sparse matrix input, is equivalent to a
3970     row-major ordering.. i.e for the following matrix, the input data expected is
3971     as shown
3972 
3973 $        1 0 0
3974 $        2 0 3     P0
3975 $       -------
3976 $        4 5 6     P1
3977 $
3978 $     Process0 [P0]: rows_owned=[0,1]
3979 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3980 $        j =  {0,0,2}  [size = 3]
3981 $        v =  {1,2,3}  [size = 3]
3982 $
3983 $     Process1 [P1]: rows_owned=[2]
3984 $        i =  {0,3}    [size = nrow+1  = 1+1]
3985 $        j =  {0,1,2}  [size = 3]
3986 $        v =  {4,5,6}  [size = 3]
3987 
3988 .keywords: matrix, aij, compressed row, sparse, parallel
3989 
3990 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3991           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3992 @*/
3993 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3994 {
3995   PetscErrorCode ierr;
3996 
3997   PetscFunctionBegin;
3998   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3999   PetscFunctionReturn(0);
4000 }
4001 
4002 /*@C
4003    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4004    (the default parallel PETSc format).  For good matrix assembly performance
4005    the user should preallocate the matrix storage by setting the parameters
4006    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4007    performance can be increased by more than a factor of 50.
4008 
4009    Collective on MPI_Comm
4010 
4011    Input Parameters:
4012 +  B - the matrix
4013 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4014            (same value is used for all local rows)
4015 .  d_nnz - array containing the number of nonzeros in the various rows of the
4016            DIAGONAL portion of the local submatrix (possibly different for each row)
4017            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4018            The size of this array is equal to the number of local rows, i.e 'm'.
4019            For matrices that will be factored, you must leave room for (and set)
4020            the diagonal entry even if it is zero.
4021 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4022            submatrix (same value is used for all local rows).
4023 -  o_nnz - array containing the number of nonzeros in the various rows of the
4024            OFF-DIAGONAL portion of the local submatrix (possibly different for
4025            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4026            structure. The size of this array is equal to the number
4027            of local rows, i.e 'm'.
4028 
4029    If the *_nnz parameter is given then the *_nz parameter is ignored
4030 
4031    The AIJ format (also called the Yale sparse matrix format or
4032    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4033    storage.  The stored row and column indices begin with zero.
4034    See Users-Manual: ch_mat for details.
4035 
4036    The parallel matrix is partitioned such that the first m0 rows belong to
4037    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4038    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4039 
4040    The DIAGONAL portion of the local submatrix of a processor can be defined
4041    as the submatrix which is obtained by extraction the part corresponding to
4042    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4043    first row that belongs to the processor, r2 is the last row belonging to
4044    the this processor, and c1-c2 is range of indices of the local part of a
4045    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4046    common case of a square matrix, the row and column ranges are the same and
4047    the DIAGONAL part is also square. The remaining portion of the local
4048    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4049 
4050    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4051 
4052    You can call MatGetInfo() to get information on how effective the preallocation was;
4053    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4054    You can also run with the option -info and look for messages with the string
4055    malloc in them to see if additional memory allocation was needed.
4056 
4057    Example usage:
4058 
4059    Consider the following 8x8 matrix with 34 non-zero values, that is
4060    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4061    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4062    as follows:
4063 
4064 .vb
4065             1  2  0  |  0  3  0  |  0  4
4066     Proc0   0  5  6  |  7  0  0  |  8  0
4067             9  0 10  | 11  0  0  | 12  0
4068     -------------------------------------
4069            13  0 14  | 15 16 17  |  0  0
4070     Proc1   0 18  0  | 19 20 21  |  0  0
4071             0  0  0  | 22 23  0  | 24  0
4072     -------------------------------------
4073     Proc2  25 26 27  |  0  0 28  | 29  0
4074            30  0  0  | 31 32 33  |  0 34
4075 .ve
4076 
4077    This can be represented as a collection of submatrices as:
4078 
4079 .vb
4080       A B C
4081       D E F
4082       G H I
4083 .ve
4084 
4085    Where the submatrices A,B,C are owned by proc0, D,E,F are
4086    owned by proc1, G,H,I are owned by proc2.
4087 
4088    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4089    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4090    The 'M','N' parameters are 8,8, and have the same values on all procs.
4091 
4092    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4093    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4094    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4095    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4096    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4097    matrix, ans [DF] as another SeqAIJ matrix.
4098 
4099    When d_nz, o_nz parameters are specified, d_nz storage elements are
4100    allocated for every row of the local diagonal submatrix, and o_nz
4101    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4102    One way to choose d_nz and o_nz is to use the max nonzerors per local
4103    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4104    In this case, the values of d_nz,o_nz are:
4105 .vb
4106      proc0 : dnz = 2, o_nz = 2
4107      proc1 : dnz = 3, o_nz = 2
4108      proc2 : dnz = 1, o_nz = 4
4109 .ve
4110    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4111    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4112    for proc3. i.e we are using 12+15+10=37 storage locations to store
4113    34 values.
4114 
4115    When d_nnz, o_nnz parameters are specified, the storage is specified
4116    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4117    In the above case the values for d_nnz,o_nnz are:
4118 .vb
4119      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4120      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4121      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4122 .ve
4123    Here the space allocated is sum of all the above values i.e 34, and
4124    hence pre-allocation is perfect.
4125 
4126    Level: intermediate
4127 
4128 .keywords: matrix, aij, compressed row, sparse, parallel
4129 
4130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4131           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4132 @*/
4133 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4134 {
4135   PetscErrorCode ierr;
4136 
4137   PetscFunctionBegin;
4138   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4139   PetscValidType(B,1);
4140   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4141   PetscFunctionReturn(0);
4142 }
4143 
4144 /*@
4145      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4146          CSR format the local rows.
4147 
4148    Collective on MPI_Comm
4149 
4150    Input Parameters:
4151 +  comm - MPI communicator
4152 .  m - number of local rows (Cannot be PETSC_DECIDE)
4153 .  n - This value should be the same as the local size used in creating the
4154        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4155        calculated if N is given) For square matrices n is almost always m.
4156 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4157 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4158 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4159 .   j - column indices
4160 -   a - matrix values
4161 
4162    Output Parameter:
4163 .   mat - the matrix
4164 
4165    Level: intermediate
4166 
4167    Notes:
4168        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4169      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4170      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4171 
4172        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4173 
4174        The format which is used for the sparse matrix input, is equivalent to a
4175     row-major ordering.. i.e for the following matrix, the input data expected is
4176     as shown
4177 
4178 $        1 0 0
4179 $        2 0 3     P0
4180 $       -------
4181 $        4 5 6     P1
4182 $
4183 $     Process0 [P0]: rows_owned=[0,1]
4184 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4185 $        j =  {0,0,2}  [size = 3]
4186 $        v =  {1,2,3}  [size = 3]
4187 $
4188 $     Process1 [P1]: rows_owned=[2]
4189 $        i =  {0,3}    [size = nrow+1  = 1+1]
4190 $        j =  {0,1,2}  [size = 3]
4191 $        v =  {4,5,6}  [size = 3]
4192 
4193 .keywords: matrix, aij, compressed row, sparse, parallel
4194 
4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4196           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4197 @*/
4198 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4199 {
4200   PetscErrorCode ierr;
4201 
4202   PetscFunctionBegin;
4203   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4204   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4205   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4206   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4207   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4208   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4209   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 /*@C
4214    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4215    (the default parallel PETSc format).  For good matrix assembly performance
4216    the user should preallocate the matrix storage by setting the parameters
4217    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4218    performance can be increased by more than a factor of 50.
4219 
4220    Collective on MPI_Comm
4221 
4222    Input Parameters:
4223 +  comm - MPI communicator
4224 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4225            This value should be the same as the local size used in creating the
4226            y vector for the matrix-vector product y = Ax.
4227 .  n - This value should be the same as the local size used in creating the
4228        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4229        calculated if N is given) For square matrices n is almost always m.
4230 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4231 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4232 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4233            (same value is used for all local rows)
4234 .  d_nnz - array containing the number of nonzeros in the various rows of the
4235            DIAGONAL portion of the local submatrix (possibly different for each row)
4236            or NULL, if d_nz is used to specify the nonzero structure.
4237            The size of this array is equal to the number of local rows, i.e 'm'.
4238 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4239            submatrix (same value is used for all local rows).
4240 -  o_nnz - array containing the number of nonzeros in the various rows of the
4241            OFF-DIAGONAL portion of the local submatrix (possibly different for
4242            each row) or NULL, if o_nz is used to specify the nonzero
4243            structure. The size of this array is equal to the number
4244            of local rows, i.e 'm'.
4245 
4246    Output Parameter:
4247 .  A - the matrix
4248 
4249    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4250    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4251    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4252 
4253    Notes:
4254    If the *_nnz parameter is given then the *_nz parameter is ignored
4255 
4256    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4257    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4258    storage requirements for this matrix.
4259 
4260    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4261    processor than it must be used on all processors that share the object for
4262    that argument.
4263 
4264    The user MUST specify either the local or global matrix dimensions
4265    (possibly both).
4266 
4267    The parallel matrix is partitioned across processors such that the
4268    first m0 rows belong to process 0, the next m1 rows belong to
4269    process 1, the next m2 rows belong to process 2 etc.. where
4270    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4271    values corresponding to [m x N] submatrix.
4272 
4273    The columns are logically partitioned with the n0 columns belonging
4274    to 0th partition, the next n1 columns belonging to the next
4275    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4276 
4277    The DIAGONAL portion of the local submatrix on any given processor
4278    is the submatrix corresponding to the rows and columns m,n
4279    corresponding to the given processor. i.e diagonal matrix on
4280    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4281    etc. The remaining portion of the local submatrix [m x (N-n)]
4282    constitute the OFF-DIAGONAL portion. The example below better
4283    illustrates this concept.
4284 
4285    For a square global matrix we define each processor's diagonal portion
4286    to be its local rows and the corresponding columns (a square submatrix);
4287    each processor's off-diagonal portion encompasses the remainder of the
4288    local matrix (a rectangular submatrix).
4289 
4290    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4291 
4292    When calling this routine with a single process communicator, a matrix of
4293    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4294    type of communicator, use the construction mechanism
4295 .vb
4296      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4297 .ve
4298 
4299 $     MatCreate(...,&A);
4300 $     MatSetType(A,MATMPIAIJ);
4301 $     MatSetSizes(A, m,n,M,N);
4302 $     MatMPIAIJSetPreallocation(A,...);
4303 
4304    By default, this format uses inodes (identical nodes) when possible.
4305    We search for consecutive rows with the same nonzero structure, thereby
4306    reusing matrix information to achieve increased efficiency.
4307 
4308    Options Database Keys:
4309 +  -mat_no_inode  - Do not use inodes
4310 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4311 
4312 
4313 
4314    Example usage:
4315 
4316    Consider the following 8x8 matrix with 34 non-zero values, that is
4317    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4318    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4319    as follows
4320 
4321 .vb
4322             1  2  0  |  0  3  0  |  0  4
4323     Proc0   0  5  6  |  7  0  0  |  8  0
4324             9  0 10  | 11  0  0  | 12  0
4325     -------------------------------------
4326            13  0 14  | 15 16 17  |  0  0
4327     Proc1   0 18  0  | 19 20 21  |  0  0
4328             0  0  0  | 22 23  0  | 24  0
4329     -------------------------------------
4330     Proc2  25 26 27  |  0  0 28  | 29  0
4331            30  0  0  | 31 32 33  |  0 34
4332 .ve
4333 
4334    This can be represented as a collection of submatrices as
4335 
4336 .vb
4337       A B C
4338       D E F
4339       G H I
4340 .ve
4341 
4342    Where the submatrices A,B,C are owned by proc0, D,E,F are
4343    owned by proc1, G,H,I are owned by proc2.
4344 
4345    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4346    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4347    The 'M','N' parameters are 8,8, and have the same values on all procs.
4348 
4349    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4350    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4351    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4352    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4353    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4354    matrix, ans [DF] as another SeqAIJ matrix.
4355 
4356    When d_nz, o_nz parameters are specified, d_nz storage elements are
4357    allocated for every row of the local diagonal submatrix, and o_nz
4358    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4359    One way to choose d_nz and o_nz is to use the max nonzerors per local
4360    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4361    In this case, the values of d_nz,o_nz are
4362 .vb
4363      proc0 : dnz = 2, o_nz = 2
4364      proc1 : dnz = 3, o_nz = 2
4365      proc2 : dnz = 1, o_nz = 4
4366 .ve
4367    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4368    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4369    for proc3. i.e we are using 12+15+10=37 storage locations to store
4370    34 values.
4371 
4372    When d_nnz, o_nnz parameters are specified, the storage is specified
4373    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4374    In the above case the values for d_nnz,o_nnz are
4375 .vb
4376      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4377      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4378      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4379 .ve
4380    Here the space allocated is sum of all the above values i.e 34, and
4381    hence pre-allocation is perfect.
4382 
4383    Level: intermediate
4384 
4385 .keywords: matrix, aij, compressed row, sparse, parallel
4386 
4387 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4388           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4389 @*/
4390 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4391 {
4392   PetscErrorCode ierr;
4393   PetscMPIInt    size;
4394 
4395   PetscFunctionBegin;
4396   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4397   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4398   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4399   if (size > 1) {
4400     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4401     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4402   } else {
4403     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4404     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4405   }
4406   PetscFunctionReturn(0);
4407 }
4408 
4409 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4410 {
4411   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4412   PetscBool      flg;
4413   PetscErrorCode ierr;
4414 
4415   PetscFunctionBegin;
4416   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4417   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4418   if (Ad)     *Ad     = a->A;
4419   if (Ao)     *Ao     = a->B;
4420   if (colmap) *colmap = a->garray;
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4425 {
4426   PetscErrorCode ierr;
4427   PetscInt       m,N,i,rstart,nnz,Ii;
4428   PetscInt       *indx;
4429   PetscScalar    *values;
4430 
4431   PetscFunctionBegin;
4432   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4433   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4434     PetscInt       *dnz,*onz,sum,bs,cbs;
4435 
4436     if (n == PETSC_DECIDE) {
4437       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4438     }
4439     /* Check sum(n) = N */
4440     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4441     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4442 
4443     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4444     rstart -= m;
4445 
4446     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4447     for (i=0; i<m; i++) {
4448       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4449       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4450       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4451     }
4452 
4453     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4454     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4455     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4456     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4457     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4458     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4459     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4460     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4461   }
4462 
4463   /* numeric phase */
4464   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4465   for (i=0; i<m; i++) {
4466     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4467     Ii   = i + rstart;
4468     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4469     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4470   }
4471   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4472   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4473   PetscFunctionReturn(0);
4474 }
4475 
4476 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4477 {
4478   PetscErrorCode    ierr;
4479   PetscMPIInt       rank;
4480   PetscInt          m,N,i,rstart,nnz;
4481   size_t            len;
4482   const PetscInt    *indx;
4483   PetscViewer       out;
4484   char              *name;
4485   Mat               B;
4486   const PetscScalar *values;
4487 
4488   PetscFunctionBegin;
4489   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4490   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4491   /* Should this be the type of the diagonal block of A? */
4492   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4493   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4494   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4495   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4496   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4497   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4498   for (i=0; i<m; i++) {
4499     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4500     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4501     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4502   }
4503   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4504   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4505 
4506   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4507   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4508   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4509   sprintf(name,"%s.%d",outfile,rank);
4510   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4511   ierr = PetscFree(name);CHKERRQ(ierr);
4512   ierr = MatView(B,out);CHKERRQ(ierr);
4513   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4514   ierr = MatDestroy(&B);CHKERRQ(ierr);
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4519 {
4520   PetscErrorCode      ierr;
4521   Mat_Merge_SeqsToMPI *merge;
4522   PetscContainer      container;
4523 
4524   PetscFunctionBegin;
4525   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4526   if (container) {
4527     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4528     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4529     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4530     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4531     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4532     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4533     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4534     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4535     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4536     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4537     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4538     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4539     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4540     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4541     ierr = PetscFree(merge);CHKERRQ(ierr);
4542     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4543   }
4544   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4545   PetscFunctionReturn(0);
4546 }
4547 
4548 #include <../src/mat/utils/freespace.h>
4549 #include <petscbt.h>
4550 
4551 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4552 {
4553   PetscErrorCode      ierr;
4554   MPI_Comm            comm;
4555   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4556   PetscMPIInt         size,rank,taga,*len_s;
4557   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4558   PetscInt            proc,m;
4559   PetscInt            **buf_ri,**buf_rj;
4560   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4561   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4562   MPI_Request         *s_waits,*r_waits;
4563   MPI_Status          *status;
4564   MatScalar           *aa=a->a;
4565   MatScalar           **abuf_r,*ba_i;
4566   Mat_Merge_SeqsToMPI *merge;
4567   PetscContainer      container;
4568 
4569   PetscFunctionBegin;
4570   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4571   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4572 
4573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4574   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4575 
4576   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4577   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4578 
4579   bi     = merge->bi;
4580   bj     = merge->bj;
4581   buf_ri = merge->buf_ri;
4582   buf_rj = merge->buf_rj;
4583 
4584   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4585   owners = merge->rowmap->range;
4586   len_s  = merge->len_s;
4587 
4588   /* send and recv matrix values */
4589   /*-----------------------------*/
4590   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4591   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4592 
4593   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4594   for (proc=0,k=0; proc<size; proc++) {
4595     if (!len_s[proc]) continue;
4596     i    = owners[proc];
4597     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4598     k++;
4599   }
4600 
4601   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4602   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4603   ierr = PetscFree(status);CHKERRQ(ierr);
4604 
4605   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4606   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4607 
4608   /* insert mat values of mpimat */
4609   /*----------------------------*/
4610   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4611   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4612 
4613   for (k=0; k<merge->nrecv; k++) {
4614     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4615     nrows       = *(buf_ri_k[k]);
4616     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4617     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4618   }
4619 
4620   /* set values of ba */
4621   m = merge->rowmap->n;
4622   for (i=0; i<m; i++) {
4623     arow = owners[rank] + i;
4624     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4625     bnzi = bi[i+1] - bi[i];
4626     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4627 
4628     /* add local non-zero vals of this proc's seqmat into ba */
4629     anzi   = ai[arow+1] - ai[arow];
4630     aj     = a->j + ai[arow];
4631     aa     = a->a + ai[arow];
4632     nextaj = 0;
4633     for (j=0; nextaj<anzi; j++) {
4634       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4635         ba_i[j] += aa[nextaj++];
4636       }
4637     }
4638 
4639     /* add received vals into ba */
4640     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4641       /* i-th row */
4642       if (i == *nextrow[k]) {
4643         anzi   = *(nextai[k]+1) - *nextai[k];
4644         aj     = buf_rj[k] + *(nextai[k]);
4645         aa     = abuf_r[k] + *(nextai[k]);
4646         nextaj = 0;
4647         for (j=0; nextaj<anzi; j++) {
4648           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4649             ba_i[j] += aa[nextaj++];
4650           }
4651         }
4652         nextrow[k]++; nextai[k]++;
4653       }
4654     }
4655     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4656   }
4657   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4658   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4659 
4660   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4661   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4662   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4663   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4664   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4665   PetscFunctionReturn(0);
4666 }
4667 
4668 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4669 {
4670   PetscErrorCode      ierr;
4671   Mat                 B_mpi;
4672   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4673   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4674   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4675   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4676   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4677   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4678   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4679   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4680   MPI_Status          *status;
4681   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4682   PetscBT             lnkbt;
4683   Mat_Merge_SeqsToMPI *merge;
4684   PetscContainer      container;
4685 
4686   PetscFunctionBegin;
4687   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4688 
4689   /* make sure it is a PETSc comm */
4690   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4691   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4692   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4693 
4694   ierr = PetscNew(&merge);CHKERRQ(ierr);
4695   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4696 
4697   /* determine row ownership */
4698   /*---------------------------------------------------------*/
4699   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4700   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4701   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4702   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4703   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4704   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4705   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4706 
4707   m      = merge->rowmap->n;
4708   owners = merge->rowmap->range;
4709 
4710   /* determine the number of messages to send, their lengths */
4711   /*---------------------------------------------------------*/
4712   len_s = merge->len_s;
4713 
4714   len          = 0; /* length of buf_si[] */
4715   merge->nsend = 0;
4716   for (proc=0; proc<size; proc++) {
4717     len_si[proc] = 0;
4718     if (proc == rank) {
4719       len_s[proc] = 0;
4720     } else {
4721       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4722       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4723     }
4724     if (len_s[proc]) {
4725       merge->nsend++;
4726       nrows = 0;
4727       for (i=owners[proc]; i<owners[proc+1]; i++) {
4728         if (ai[i+1] > ai[i]) nrows++;
4729       }
4730       len_si[proc] = 2*(nrows+1);
4731       len         += len_si[proc];
4732     }
4733   }
4734 
4735   /* determine the number and length of messages to receive for ij-structure */
4736   /*-------------------------------------------------------------------------*/
4737   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4738   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4739 
4740   /* post the Irecv of j-structure */
4741   /*-------------------------------*/
4742   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4743   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4744 
4745   /* post the Isend of j-structure */
4746   /*--------------------------------*/
4747   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4748 
4749   for (proc=0, k=0; proc<size; proc++) {
4750     if (!len_s[proc]) continue;
4751     i    = owners[proc];
4752     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4753     k++;
4754   }
4755 
4756   /* receives and sends of j-structure are complete */
4757   /*------------------------------------------------*/
4758   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4759   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4760 
4761   /* send and recv i-structure */
4762   /*---------------------------*/
4763   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4764   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4765 
4766   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4767   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4768   for (proc=0,k=0; proc<size; proc++) {
4769     if (!len_s[proc]) continue;
4770     /* form outgoing message for i-structure:
4771          buf_si[0]:                 nrows to be sent
4772                [1:nrows]:           row index (global)
4773                [nrows+1:2*nrows+1]: i-structure index
4774     */
4775     /*-------------------------------------------*/
4776     nrows       = len_si[proc]/2 - 1;
4777     buf_si_i    = buf_si + nrows+1;
4778     buf_si[0]   = nrows;
4779     buf_si_i[0] = 0;
4780     nrows       = 0;
4781     for (i=owners[proc]; i<owners[proc+1]; i++) {
4782       anzi = ai[i+1] - ai[i];
4783       if (anzi) {
4784         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4785         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4786         nrows++;
4787       }
4788     }
4789     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4790     k++;
4791     buf_si += len_si[proc];
4792   }
4793 
4794   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4795   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4796 
4797   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4798   for (i=0; i<merge->nrecv; i++) {
4799     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4800   }
4801 
4802   ierr = PetscFree(len_si);CHKERRQ(ierr);
4803   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4804   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4805   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4806   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4807   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4808   ierr = PetscFree(status);CHKERRQ(ierr);
4809 
4810   /* compute a local seq matrix in each processor */
4811   /*----------------------------------------------*/
4812   /* allocate bi array and free space for accumulating nonzero column info */
4813   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4814   bi[0] = 0;
4815 
4816   /* create and initialize a linked list */
4817   nlnk = N+1;
4818   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4819 
4820   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4821   len  = ai[owners[rank+1]] - ai[owners[rank]];
4822   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4823 
4824   current_space = free_space;
4825 
4826   /* determine symbolic info for each local row */
4827   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4828 
4829   for (k=0; k<merge->nrecv; k++) {
4830     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4831     nrows       = *buf_ri_k[k];
4832     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4833     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4834   }
4835 
4836   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4837   len  = 0;
4838   for (i=0; i<m; i++) {
4839     bnzi = 0;
4840     /* add local non-zero cols of this proc's seqmat into lnk */
4841     arow  = owners[rank] + i;
4842     anzi  = ai[arow+1] - ai[arow];
4843     aj    = a->j + ai[arow];
4844     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4845     bnzi += nlnk;
4846     /* add received col data into lnk */
4847     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4848       if (i == *nextrow[k]) { /* i-th row */
4849         anzi  = *(nextai[k]+1) - *nextai[k];
4850         aj    = buf_rj[k] + *nextai[k];
4851         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4852         bnzi += nlnk;
4853         nextrow[k]++; nextai[k]++;
4854       }
4855     }
4856     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4857 
4858     /* if free space is not available, make more free space */
4859     if (current_space->local_remaining<bnzi) {
4860       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4861       nspacedouble++;
4862     }
4863     /* copy data into free space, then initialize lnk */
4864     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4865     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4866 
4867     current_space->array           += bnzi;
4868     current_space->local_used      += bnzi;
4869     current_space->local_remaining -= bnzi;
4870 
4871     bi[i+1] = bi[i] + bnzi;
4872   }
4873 
4874   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4875 
4876   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4877   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4878   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4879 
4880   /* create symbolic parallel matrix B_mpi */
4881   /*---------------------------------------*/
4882   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4883   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4884   if (n==PETSC_DECIDE) {
4885     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4886   } else {
4887     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4888   }
4889   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4890   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4891   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4892   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4893   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4894 
4895   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4896   B_mpi->assembled    = PETSC_FALSE;
4897   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4898   merge->bi           = bi;
4899   merge->bj           = bj;
4900   merge->buf_ri       = buf_ri;
4901   merge->buf_rj       = buf_rj;
4902   merge->coi          = NULL;
4903   merge->coj          = NULL;
4904   merge->owners_co    = NULL;
4905 
4906   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4907 
4908   /* attach the supporting struct to B_mpi for reuse */
4909   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4910   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4911   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4912   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4913   *mpimat = B_mpi;
4914 
4915   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 /*@C
4920       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4921                  matrices from each processor
4922 
4923     Collective on MPI_Comm
4924 
4925    Input Parameters:
4926 +    comm - the communicators the parallel matrix will live on
4927 .    seqmat - the input sequential matrices
4928 .    m - number of local rows (or PETSC_DECIDE)
4929 .    n - number of local columns (or PETSC_DECIDE)
4930 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4931 
4932    Output Parameter:
4933 .    mpimat - the parallel matrix generated
4934 
4935     Level: advanced
4936 
4937    Notes:
4938      The dimensions of the sequential matrix in each processor MUST be the same.
4939      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4940      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4941 @*/
4942 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4943 {
4944   PetscErrorCode ierr;
4945   PetscMPIInt    size;
4946 
4947   PetscFunctionBegin;
4948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4949   if (size == 1) {
4950     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4951     if (scall == MAT_INITIAL_MATRIX) {
4952       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4953     } else {
4954       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4955     }
4956     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4957     PetscFunctionReturn(0);
4958   }
4959   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4960   if (scall == MAT_INITIAL_MATRIX) {
4961     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4962   }
4963   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4964   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4965   PetscFunctionReturn(0);
4966 }
4967 
4968 /*@
4969      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4970           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4971           with MatGetSize()
4972 
4973     Not Collective
4974 
4975    Input Parameters:
4976 +    A - the matrix
4977 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4978 
4979    Output Parameter:
4980 .    A_loc - the local sequential matrix generated
4981 
4982     Level: developer
4983 
4984 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
4985 
4986 @*/
4987 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4988 {
4989   PetscErrorCode ierr;
4990   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4991   Mat_SeqAIJ     *mat,*a,*b;
4992   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4993   MatScalar      *aa,*ba,*cam;
4994   PetscScalar    *ca;
4995   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4996   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4997   PetscBool      match;
4998   MPI_Comm       comm;
4999   PetscMPIInt    size;
5000 
5001   PetscFunctionBegin;
5002   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5003   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5004   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5005   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5006   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5007 
5008   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5009   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5010   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5011   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5012   aa = a->a; ba = b->a;
5013   if (scall == MAT_INITIAL_MATRIX) {
5014     if (size == 1) {
5015       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5016       PetscFunctionReturn(0);
5017     }
5018 
5019     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5020     ci[0] = 0;
5021     for (i=0; i<am; i++) {
5022       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5023     }
5024     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5025     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5026     k    = 0;
5027     for (i=0; i<am; i++) {
5028       ncols_o = bi[i+1] - bi[i];
5029       ncols_d = ai[i+1] - ai[i];
5030       /* off-diagonal portion of A */
5031       for (jo=0; jo<ncols_o; jo++) {
5032         col = cmap[*bj];
5033         if (col >= cstart) break;
5034         cj[k]   = col; bj++;
5035         ca[k++] = *ba++;
5036       }
5037       /* diagonal portion of A */
5038       for (j=0; j<ncols_d; j++) {
5039         cj[k]   = cstart + *aj++;
5040         ca[k++] = *aa++;
5041       }
5042       /* off-diagonal portion of A */
5043       for (j=jo; j<ncols_o; j++) {
5044         cj[k]   = cmap[*bj++];
5045         ca[k++] = *ba++;
5046       }
5047     }
5048     /* put together the new matrix */
5049     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5050     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5051     /* Since these are PETSc arrays, change flags to free them as necessary. */
5052     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5053     mat->free_a  = PETSC_TRUE;
5054     mat->free_ij = PETSC_TRUE;
5055     mat->nonew   = 0;
5056   } else if (scall == MAT_REUSE_MATRIX) {
5057     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5058     ci = mat->i; cj = mat->j; cam = mat->a;
5059     for (i=0; i<am; i++) {
5060       /* off-diagonal portion of A */
5061       ncols_o = bi[i+1] - bi[i];
5062       for (jo=0; jo<ncols_o; jo++) {
5063         col = cmap[*bj];
5064         if (col >= cstart) break;
5065         *cam++ = *ba++; bj++;
5066       }
5067       /* diagonal portion of A */
5068       ncols_d = ai[i+1] - ai[i];
5069       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5070       /* off-diagonal portion of A */
5071       for (j=jo; j<ncols_o; j++) {
5072         *cam++ = *ba++; bj++;
5073       }
5074     }
5075   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5076   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5077   PetscFunctionReturn(0);
5078 }
5079 
5080 /*@C
5081      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5082 
5083     Not Collective
5084 
5085    Input Parameters:
5086 +    A - the matrix
5087 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5088 -    row, col - index sets of rows and columns to extract (or NULL)
5089 
5090    Output Parameter:
5091 .    A_loc - the local sequential matrix generated
5092 
5093     Level: developer
5094 
5095 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5096 
5097 @*/
5098 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5099 {
5100   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5101   PetscErrorCode ierr;
5102   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5103   IS             isrowa,iscola;
5104   Mat            *aloc;
5105   PetscBool      match;
5106 
5107   PetscFunctionBegin;
5108   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5109   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5110   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5111   if (!row) {
5112     start = A->rmap->rstart; end = A->rmap->rend;
5113     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5114   } else {
5115     isrowa = *row;
5116   }
5117   if (!col) {
5118     start = A->cmap->rstart;
5119     cmap  = a->garray;
5120     nzA   = a->A->cmap->n;
5121     nzB   = a->B->cmap->n;
5122     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5123     ncols = 0;
5124     for (i=0; i<nzB; i++) {
5125       if (cmap[i] < start) idx[ncols++] = cmap[i];
5126       else break;
5127     }
5128     imark = i;
5129     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5130     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5131     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5132   } else {
5133     iscola = *col;
5134   }
5135   if (scall != MAT_INITIAL_MATRIX) {
5136     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5137     aloc[0] = *A_loc;
5138   }
5139   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5140   if (!col) { /* attach global id of condensed columns */
5141     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5142   }
5143   *A_loc = aloc[0];
5144   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5145   if (!row) {
5146     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5147   }
5148   if (!col) {
5149     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5150   }
5151   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5152   PetscFunctionReturn(0);
5153 }
5154 
5155 /*@C
5156     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5157 
5158     Collective on Mat
5159 
5160    Input Parameters:
5161 +    A,B - the matrices in mpiaij format
5162 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5163 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5164 
5165    Output Parameter:
5166 +    rowb, colb - index sets of rows and columns of B to extract
5167 -    B_seq - the sequential matrix generated
5168 
5169     Level: developer
5170 
5171 @*/
5172 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5173 {
5174   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5175   PetscErrorCode ierr;
5176   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5177   IS             isrowb,iscolb;
5178   Mat            *bseq=NULL;
5179 
5180   PetscFunctionBegin;
5181   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5182     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5183   }
5184   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5185 
5186   if (scall == MAT_INITIAL_MATRIX) {
5187     start = A->cmap->rstart;
5188     cmap  = a->garray;
5189     nzA   = a->A->cmap->n;
5190     nzB   = a->B->cmap->n;
5191     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5192     ncols = 0;
5193     for (i=0; i<nzB; i++) {  /* row < local row index */
5194       if (cmap[i] < start) idx[ncols++] = cmap[i];
5195       else break;
5196     }
5197     imark = i;
5198     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5199     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5200     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5201     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5202   } else {
5203     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5204     isrowb  = *rowb; iscolb = *colb;
5205     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5206     bseq[0] = *B_seq;
5207   }
5208   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5209   *B_seq = bseq[0];
5210   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5211   if (!rowb) {
5212     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5213   } else {
5214     *rowb = isrowb;
5215   }
5216   if (!colb) {
5217     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5218   } else {
5219     *colb = iscolb;
5220   }
5221   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5222   PetscFunctionReturn(0);
5223 }
5224 
5225 /*
5226     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5227     of the OFF-DIAGONAL portion of local A
5228 
5229     Collective on Mat
5230 
5231    Input Parameters:
5232 +    A,B - the matrices in mpiaij format
5233 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5234 
5235    Output Parameter:
5236 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5237 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5238 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5239 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5240 
5241     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5242      for this matrix. This is not desirable..
5243 
5244     Level: developer
5245 
5246 */
5247 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5248 {
5249   PetscErrorCode         ierr;
5250   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5251   Mat_SeqAIJ             *b_oth;
5252   VecScatter             ctx;
5253   MPI_Comm               comm;
5254   const PetscMPIInt      *rprocs,*sprocs;
5255   const PetscInt         *srow,*rstarts,*sstarts;
5256   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5257   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5258   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5259   MPI_Request            *rwaits = NULL,*swaits = NULL;
5260   MPI_Status             rstatus;
5261   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5262 
5263   PetscFunctionBegin;
5264   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5265   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5266 
5267   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5268     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5269   }
5270   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5272 
5273   if (size == 1) {
5274     startsj_s = NULL;
5275     bufa_ptr  = NULL;
5276     *B_oth    = NULL;
5277     PetscFunctionReturn(0);
5278   }
5279 
5280   ctx = a->Mvctx;
5281   tag = ((PetscObject)ctx)->tag;
5282 
5283   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5284   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5285   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5286   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5287   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5288   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5289   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5290 
5291   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5292   if (scall == MAT_INITIAL_MATRIX) {
5293     /* i-array */
5294     /*---------*/
5295     /*  post receives */
5296     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5297     for (i=0; i<nrecvs; i++) {
5298       rowlen = rvalues + rstarts[i]*rbs;
5299       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5300       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5301     }
5302 
5303     /* pack the outgoing message */
5304     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5305 
5306     sstartsj[0] = 0;
5307     rstartsj[0] = 0;
5308     len         = 0; /* total length of j or a array to be sent */
5309     if (nsends) {
5310       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5311       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5312     }
5313     for (i=0; i<nsends; i++) {
5314       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5315       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5316       for (j=0; j<nrows; j++) {
5317         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5318         for (l=0; l<sbs; l++) {
5319           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5320 
5321           rowlen[j*sbs+l] = ncols;
5322 
5323           len += ncols;
5324           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5325         }
5326         k++;
5327       }
5328       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5329 
5330       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5331     }
5332     /* recvs and sends of i-array are completed */
5333     i = nrecvs;
5334     while (i--) {
5335       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5336     }
5337     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5338     ierr = PetscFree(svalues);CHKERRQ(ierr);
5339 
5340     /* allocate buffers for sending j and a arrays */
5341     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5342     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5343 
5344     /* create i-array of B_oth */
5345     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5346 
5347     b_othi[0] = 0;
5348     len       = 0; /* total length of j or a array to be received */
5349     k         = 0;
5350     for (i=0; i<nrecvs; i++) {
5351       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5352       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5353       for (j=0; j<nrows; j++) {
5354         b_othi[k+1] = b_othi[k] + rowlen[j];
5355         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5356         k++;
5357       }
5358       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5359     }
5360     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5361 
5362     /* allocate space for j and a arrrays of B_oth */
5363     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5364     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5365 
5366     /* j-array */
5367     /*---------*/
5368     /*  post receives of j-array */
5369     for (i=0; i<nrecvs; i++) {
5370       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5371       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5372     }
5373 
5374     /* pack the outgoing message j-array */
5375     if (nsends) k = sstarts[0];
5376     for (i=0; i<nsends; i++) {
5377       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5378       bufJ  = bufj+sstartsj[i];
5379       for (j=0; j<nrows; j++) {
5380         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5381         for (ll=0; ll<sbs; ll++) {
5382           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5383           for (l=0; l<ncols; l++) {
5384             *bufJ++ = cols[l];
5385           }
5386           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5387         }
5388       }
5389       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5390     }
5391 
5392     /* recvs and sends of j-array are completed */
5393     i = nrecvs;
5394     while (i--) {
5395       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5396     }
5397     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5398   } else if (scall == MAT_REUSE_MATRIX) {
5399     sstartsj = *startsj_s;
5400     rstartsj = *startsj_r;
5401     bufa     = *bufa_ptr;
5402     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5403     b_otha   = b_oth->a;
5404   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5405 
5406   /* a-array */
5407   /*---------*/
5408   /*  post receives of a-array */
5409   for (i=0; i<nrecvs; i++) {
5410     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5411     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5412   }
5413 
5414   /* pack the outgoing message a-array */
5415   if (nsends) k = sstarts[0];
5416   for (i=0; i<nsends; i++) {
5417     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5418     bufA  = bufa+sstartsj[i];
5419     for (j=0; j<nrows; j++) {
5420       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5421       for (ll=0; ll<sbs; ll++) {
5422         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5423         for (l=0; l<ncols; l++) {
5424           *bufA++ = vals[l];
5425         }
5426         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5427       }
5428     }
5429     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5430   }
5431   /* recvs and sends of a-array are completed */
5432   i = nrecvs;
5433   while (i--) {
5434     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5435   }
5436   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5437   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5438 
5439   if (scall == MAT_INITIAL_MATRIX) {
5440     /* put together the new matrix */
5441     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5442 
5443     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5444     /* Since these are PETSc arrays, change flags to free them as necessary. */
5445     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5446     b_oth->free_a  = PETSC_TRUE;
5447     b_oth->free_ij = PETSC_TRUE;
5448     b_oth->nonew   = 0;
5449 
5450     ierr = PetscFree(bufj);CHKERRQ(ierr);
5451     if (!startsj_s || !bufa_ptr) {
5452       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5453       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5454     } else {
5455       *startsj_s = sstartsj;
5456       *startsj_r = rstartsj;
5457       *bufa_ptr  = bufa;
5458     }
5459   }
5460 
5461   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5462   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5463   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5464   PetscFunctionReturn(0);
5465 }
5466 
5467 /*@C
5468   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5469 
5470   Not Collective
5471 
5472   Input Parameters:
5473 . A - The matrix in mpiaij format
5474 
5475   Output Parameter:
5476 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5477 . colmap - A map from global column index to local index into lvec
5478 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5479 
5480   Level: developer
5481 
5482 @*/
5483 #if defined(PETSC_USE_CTABLE)
5484 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5485 #else
5486 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5487 #endif
5488 {
5489   Mat_MPIAIJ *a;
5490 
5491   PetscFunctionBegin;
5492   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5493   PetscValidPointer(lvec, 2);
5494   PetscValidPointer(colmap, 3);
5495   PetscValidPointer(multScatter, 4);
5496   a = (Mat_MPIAIJ*) A->data;
5497   if (lvec) *lvec = a->lvec;
5498   if (colmap) *colmap = a->colmap;
5499   if (multScatter) *multScatter = a->Mvctx;
5500   PetscFunctionReturn(0);
5501 }
5502 
5503 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5504 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5505 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5506 #if defined(PETSC_HAVE_MKL_SPARSE)
5507 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5508 #endif
5509 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5510 #if defined(PETSC_HAVE_ELEMENTAL)
5511 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5512 #endif
5513 #if defined(PETSC_HAVE_HYPRE)
5514 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5515 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5516 #endif
5517 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5519 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5520 
5521 /*
5522     Computes (B'*A')' since computing B*A directly is untenable
5523 
5524                n                       p                          p
5525         (              )       (              )         (                  )
5526       m (      A       )  *  n (       B      )   =   m (         C        )
5527         (              )       (              )         (                  )
5528 
5529 */
5530 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5531 {
5532   PetscErrorCode ierr;
5533   Mat            At,Bt,Ct;
5534 
5535   PetscFunctionBegin;
5536   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5537   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5538   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5539   ierr = MatDestroy(&At);CHKERRQ(ierr);
5540   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5541   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5542   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5543   PetscFunctionReturn(0);
5544 }
5545 
5546 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5547 {
5548   PetscErrorCode ierr;
5549   PetscInt       m=A->rmap->n,n=B->cmap->n;
5550   Mat            Cmat;
5551 
5552   PetscFunctionBegin;
5553   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5554   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5555   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5556   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5557   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5558   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5559   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5560   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5561 
5562   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5563 
5564   *C = Cmat;
5565   PetscFunctionReturn(0);
5566 }
5567 
5568 /* ----------------------------------------------------------------*/
5569 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5570 {
5571   PetscErrorCode ierr;
5572 
5573   PetscFunctionBegin;
5574   if (scall == MAT_INITIAL_MATRIX) {
5575     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5576     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5577     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5578   }
5579   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5580   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5581   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5582   PetscFunctionReturn(0);
5583 }
5584 
5585 /*MC
5586    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5587 
5588    Options Database Keys:
5589 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5590 
5591   Level: beginner
5592 
5593 .seealso: MatCreateAIJ()
5594 M*/
5595 
5596 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5597 {
5598   Mat_MPIAIJ     *b;
5599   PetscErrorCode ierr;
5600   PetscMPIInt    size;
5601 
5602   PetscFunctionBegin;
5603   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5604 
5605   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5606   B->data       = (void*)b;
5607   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5608   B->assembled  = PETSC_FALSE;
5609   B->insertmode = NOT_SET_VALUES;
5610   b->size       = size;
5611 
5612   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5613 
5614   /* build cache for off array entries formed */
5615   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5616 
5617   b->donotstash  = PETSC_FALSE;
5618   b->colmap      = 0;
5619   b->garray      = 0;
5620   b->roworiented = PETSC_TRUE;
5621 
5622   /* stuff used for matrix vector multiply */
5623   b->lvec  = NULL;
5624   b->Mvctx = NULL;
5625 
5626   /* stuff for MatGetRow() */
5627   b->rowindices   = 0;
5628   b->rowvalues    = 0;
5629   b->getrowactive = PETSC_FALSE;
5630 
5631   /* flexible pointer used in CUSP/CUSPARSE classes */
5632   b->spptr = NULL;
5633 
5634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5635   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
5644 #if defined(PETSC_HAVE_MKL_SPARSE)
5645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5646 #endif
5647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5649 #if defined(PETSC_HAVE_ELEMENTAL)
5650   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5651 #endif
5652 #if defined(PETSC_HAVE_HYPRE)
5653   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5654 #endif
5655   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
5656   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5658   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5660 #if defined(PETSC_HAVE_HYPRE)
5661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5662 #endif
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
5664   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5665   PetscFunctionReturn(0);
5666 }
5667 
5668 /*@C
5669      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5670          and "off-diagonal" part of the matrix in CSR format.
5671 
5672    Collective on MPI_Comm
5673 
5674    Input Parameters:
5675 +  comm - MPI communicator
5676 .  m - number of local rows (Cannot be PETSC_DECIDE)
5677 .  n - This value should be the same as the local size used in creating the
5678        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5679        calculated if N is given) For square matrices n is almost always m.
5680 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5681 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5682 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
5683 .   j - column indices
5684 .   a - matrix values
5685 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
5686 .   oj - column indices
5687 -   oa - matrix values
5688 
5689    Output Parameter:
5690 .   mat - the matrix
5691 
5692    Level: advanced
5693 
5694    Notes:
5695        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5696        must free the arrays once the matrix has been destroyed and not before.
5697 
5698        The i and j indices are 0 based
5699 
5700        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5701 
5702        This sets local rows and cannot be used to set off-processor values.
5703 
5704        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5705        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5706        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5707        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5708        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5709        communication if it is known that only local entries will be set.
5710 
5711 .keywords: matrix, aij, compressed row, sparse, parallel
5712 
5713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5714           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5715 @*/
5716 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5717 {
5718   PetscErrorCode ierr;
5719   Mat_MPIAIJ     *maij;
5720 
5721   PetscFunctionBegin;
5722   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5723   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5724   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5725   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5726   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5727   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5728   maij = (Mat_MPIAIJ*) (*mat)->data;
5729 
5730   (*mat)->preallocated = PETSC_TRUE;
5731 
5732   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5733   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5734 
5735   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5736   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5737 
5738   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5739   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5740   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5741   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5742 
5743   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5744   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5745   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5746   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5747   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5748   PetscFunctionReturn(0);
5749 }
5750 
5751 /*
5752     Special version for direct calls from Fortran
5753 */
5754 #include <petsc/private/fortranimpl.h>
5755 
5756 /* Change these macros so can be used in void function */
5757 #undef CHKERRQ
5758 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5759 #undef SETERRQ2
5760 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5761 #undef SETERRQ3
5762 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5763 #undef SETERRQ
5764 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5765 
5766 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5767 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5768 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5769 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5770 #else
5771 #endif
5772 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5773 {
5774   Mat            mat  = *mmat;
5775   PetscInt       m    = *mm, n = *mn;
5776   InsertMode     addv = *maddv;
5777   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5778   PetscScalar    value;
5779   PetscErrorCode ierr;
5780 
5781   MatCheckPreallocated(mat,1);
5782   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5783 
5784 #if defined(PETSC_USE_DEBUG)
5785   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5786 #endif
5787   {
5788     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5789     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5790     PetscBool roworiented = aij->roworiented;
5791 
5792     /* Some Variables required in the macro */
5793     Mat        A                 = aij->A;
5794     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5795     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5796     MatScalar  *aa               = a->a;
5797     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5798     Mat        B                 = aij->B;
5799     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5800     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5801     MatScalar  *ba               = b->a;
5802 
5803     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5804     PetscInt  nonew = a->nonew;
5805     MatScalar *ap1,*ap2;
5806 
5807     PetscFunctionBegin;
5808     for (i=0; i<m; i++) {
5809       if (im[i] < 0) continue;
5810 #if defined(PETSC_USE_DEBUG)
5811       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5812 #endif
5813       if (im[i] >= rstart && im[i] < rend) {
5814         row      = im[i] - rstart;
5815         lastcol1 = -1;
5816         rp1      = aj + ai[row];
5817         ap1      = aa + ai[row];
5818         rmax1    = aimax[row];
5819         nrow1    = ailen[row];
5820         low1     = 0;
5821         high1    = nrow1;
5822         lastcol2 = -1;
5823         rp2      = bj + bi[row];
5824         ap2      = ba + bi[row];
5825         rmax2    = bimax[row];
5826         nrow2    = bilen[row];
5827         low2     = 0;
5828         high2    = nrow2;
5829 
5830         for (j=0; j<n; j++) {
5831           if (roworiented) value = v[i*n+j];
5832           else value = v[i+j*m];
5833           if (in[j] >= cstart && in[j] < cend) {
5834             col = in[j] - cstart;
5835             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5836             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5837           } else if (in[j] < 0) continue;
5838 #if defined(PETSC_USE_DEBUG)
5839           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5840           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5841 #endif
5842           else {
5843             if (mat->was_assembled) {
5844               if (!aij->colmap) {
5845                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5846               }
5847 #if defined(PETSC_USE_CTABLE)
5848               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5849               col--;
5850 #else
5851               col = aij->colmap[in[j]] - 1;
5852 #endif
5853               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5854               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5855                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5856                 col  =  in[j];
5857                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5858                 B     = aij->B;
5859                 b     = (Mat_SeqAIJ*)B->data;
5860                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5861                 rp2   = bj + bi[row];
5862                 ap2   = ba + bi[row];
5863                 rmax2 = bimax[row];
5864                 nrow2 = bilen[row];
5865                 low2  = 0;
5866                 high2 = nrow2;
5867                 bm    = aij->B->rmap->n;
5868                 ba    = b->a;
5869               }
5870             } else col = in[j];
5871             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5872           }
5873         }
5874       } else if (!aij->donotstash) {
5875         if (roworiented) {
5876           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5877         } else {
5878           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5879         }
5880       }
5881     }
5882   }
5883   PetscFunctionReturnVoid();
5884 }
5885