xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1143afedca6e8bc84e0774f5da5cd1d6e8e46c2e)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->pinnedtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = 0;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           goto a_noinsert; \
468         } \
469       }  \
470       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
471       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
472       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
473       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
474       N = nrow1++ - 1; a->nz++; high1++; \
475       /* shift up all the later entries in this row */ \
476       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
477       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
478       rp1[_i] = col;  \
479       ap1[_i] = value;  \
480       A->nonzerostate++;\
481       a_noinsert: ; \
482       ailen[row] = nrow1; \
483 }
484 
485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
486   { \
487     if (col <= lastcol2) low2 = 0;                        \
488     else high2 = nrow2;                                   \
489     lastcol2 = col;                                       \
490     while (high2-low2 > 5) {                              \
491       t = (low2+high2)/2;                                 \
492       if (rp2[t] > col) high2 = t;                        \
493       else             low2  = t;                         \
494     }                                                     \
495     for (_i=low2; _i<high2; _i++) {                       \
496       if (rp2[_i] > col) break;                           \
497       if (rp2[_i] == col) {                               \
498         if (addv == ADD_VALUES) {                         \
499           ap2[_i] += value;                               \
500           (void)PetscLogFlops(1.0);                       \
501         }                                                 \
502         else                    ap2[_i] = value;          \
503         goto b_noinsert;                                  \
504       }                                                   \
505     }                                                     \
506     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
507     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
508     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
509     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
510     N = nrow2++ - 1; b->nz++; high2++;                    \
511     /* shift up all the later entries in this row */      \
512     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
513     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
514     rp2[_i] = col;                                        \
515     ap2[_i] = value;                                      \
516     B->nonzerostate++;                                    \
517     b_noinsert: ;                                         \
518     bilen[row] = nrow2;                                   \
519   }
520 
521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
522 {
523   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
524   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
525   PetscErrorCode ierr;
526   PetscInt       l,*garray = mat->garray,diag;
527 
528   PetscFunctionBegin;
529   /* code only works for square matrices A */
530 
531   /* find size of row to the left of the diagonal part */
532   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
533   row  = row - diag;
534   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
535     if (garray[b->j[b->i[row]+l]] > diag) break;
536   }
537   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
538 
539   /* diagonal part */
540   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
541 
542   /* right of diagonal part */
543   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
548 {
549   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
550   PetscScalar    value = 0.0;
551   PetscErrorCode ierr;
552   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
553   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
554   PetscBool      roworiented = aij->roworiented;
555 
556   /* Some Variables required in the macro */
557   Mat        A                 = aij->A;
558   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
559   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
560   MatScalar  *aa               = a->a;
561   PetscBool  ignorezeroentries = a->ignorezeroentries;
562   Mat        B                 = aij->B;
563   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
564   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
565   MatScalar  *ba               = b->a;
566 
567   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
568   PetscInt  nonew;
569   MatScalar *ap1,*ap2;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (im[i] < 0) continue;
574 #if defined(PETSC_USE_DEBUG)
575     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
576 #endif
577     if (im[i] >= rstart && im[i] < rend) {
578       row      = im[i] - rstart;
579       lastcol1 = -1;
580       rp1      = aj + ai[row];
581       ap1      = aa + ai[row];
582       rmax1    = aimax[row];
583       nrow1    = ailen[row];
584       low1     = 0;
585       high1    = nrow1;
586       lastcol2 = -1;
587       rp2      = bj + bi[row];
588       ap2      = ba + bi[row];
589       rmax2    = bimax[row];
590       nrow2    = bilen[row];
591       low2     = 0;
592       high2    = nrow2;
593 
594       for (j=0; j<n; j++) {
595         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
596         if (in[j] >= cstart && in[j] < cend) {
597           col   = in[j] - cstart;
598           nonew = a->nonew;
599           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
600           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
601         } else if (in[j] < 0) continue;
602 #if defined(PETSC_USE_DEBUG)
603         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
604 #endif
605         else {
606           if (mat->was_assembled) {
607             if (!aij->colmap) {
608               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609             }
610 #if defined(PETSC_USE_CTABLE)
611             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
612             col--;
613 #else
614             col = aij->colmap[in[j]] - 1;
615 #endif
616             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
617               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
618               col  =  in[j];
619               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
620               B     = aij->B;
621               b     = (Mat_SeqAIJ*)B->data;
622               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
623               rp2   = bj + bi[row];
624               ap2   = ba + bi[row];
625               rmax2 = bimax[row];
626               nrow2 = bilen[row];
627               low2  = 0;
628               high2 = nrow2;
629               bm    = aij->B->rmap->n;
630               ba    = b->a;
631             } else if (col < 0) {
632               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
633                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
634               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
635             }
636           } else col = in[j];
637           nonew = b->nonew;
638           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
639         }
640       }
641     } else {
642       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
643       if (!aij->donotstash) {
644         mat->assembled = PETSC_FALSE;
645         if (roworiented) {
646           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
647         } else {
648           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
649         }
650       }
651     }
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
660 */
661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
662 {
663   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
664   Mat            A           = aij->A; /* diagonal part of the matrix */
665   Mat            B           = aij->B; /* offdiagonal part of the matrix */
666   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
667   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
668   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
669   PetscInt       *ailen      = a->ilen,*aj = a->j;
670   PetscInt       *bilen      = b->ilen,*bj = b->j;
671   PetscInt       am          = aij->A->rmap->n,j;
672   PetscInt       diag_so_far = 0,dnz;
673   PetscInt       offd_so_far = 0,onz;
674 
675   PetscFunctionBegin;
676   /* Iterate over all rows of the matrix */
677   for (j=0; j<am; j++) {
678     dnz = onz = 0;
679     /*  Iterate over all non-zero columns of the current row */
680     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
681       /* If column is in the diagonal */
682       if (mat_j[col] >= cstart && mat_j[col] < cend) {
683         aj[diag_so_far++] = mat_j[col] - cstart;
684         dnz++;
685       } else { /* off-diagonal entries */
686         bj[offd_so_far++] = mat_j[col];
687         onz++;
688       }
689     }
690     ailen[j] = dnz;
691     bilen[j] = onz;
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 /*
697     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
698     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
699     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
700     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
701     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
702 */
703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
704 {
705   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
706   Mat            A      = aij->A; /* diagonal part of the matrix */
707   Mat            B      = aij->B; /* offdiagonal part of the matrix */
708   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
709   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
710   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
711   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
712   PetscInt       *ailen = a->ilen,*aj = a->j;
713   PetscInt       *bilen = b->ilen,*bj = b->j;
714   PetscInt       am     = aij->A->rmap->n,j;
715   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
716   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
717   PetscScalar    *aa = a->a,*ba = b->a;
718 
719   PetscFunctionBegin;
720   /* Iterate over all rows of the matrix */
721   for (j=0; j<am; j++) {
722     dnz_row = onz_row = 0;
723     rowstart_offd = full_offd_i[j];
724     rowstart_diag = full_diag_i[j];
725     /*  Iterate over all non-zero columns of the current row */
726     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
727       /* If column is in the diagonal */
728       if (mat_j[col] >= cstart && mat_j[col] < cend) {
729         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
730         aa[rowstart_diag+dnz_row] = mat_a[col];
731         dnz_row++;
732       } else { /* off-diagonal entries */
733         bj[rowstart_offd+onz_row] = mat_j[col];
734         ba[rowstart_offd+onz_row] = mat_a[col];
735         onz_row++;
736       }
737     }
738     ailen[j] = dnz_row;
739     bilen[j] = onz_row;
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
745 {
746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
749   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
750 
751   PetscFunctionBegin;
752   for (i=0; i<m; i++) {
753     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
754     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
755     if (idxm[i] >= rstart && idxm[i] < rend) {
756       row = idxm[i] - rstart;
757       for (j=0; j<n; j++) {
758         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
759         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
760         if (idxn[j] >= cstart && idxn[j] < cend) {
761           col  = idxn[j] - cstart;
762           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
763         } else {
764           if (!aij->colmap) {
765             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
766           }
767 #if defined(PETSC_USE_CTABLE)
768           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
769           col--;
770 #else
771           col = aij->colmap[idxn[j]] - 1;
772 #endif
773           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
774           else {
775             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776           }
777         }
778       }
779     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
785 
786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
787 {
788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
789   PetscErrorCode ierr;
790   PetscInt       nstash,reallocs;
791 
792   PetscFunctionBegin;
793   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
794 
795   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
796   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
797   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
804   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
805   PetscErrorCode ierr;
806   PetscMPIInt    n;
807   PetscInt       i,j,rstart,ncols,flg;
808   PetscInt       *row,*col;
809   PetscBool      other_disassembled;
810   PetscScalar    *val;
811 
812   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
813 
814   PetscFunctionBegin;
815   if (!aij->donotstash && !mat->nooffprocentries) {
816     while (1) {
817       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
818       if (!flg) break;
819 
820       for (i=0; i<n; ) {
821         /* Now identify the consecutive vals belonging to the same row */
822         for (j=i,rstart=row[j]; j<n; j++) {
823           if (row[j] != rstart) break;
824         }
825         if (j < n) ncols = j-i;
826         else       ncols = n-i;
827         /* Now assemble all these values with a single function call */
828         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
829 
830         i = j;
831       }
832     }
833     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
834   }
835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
836   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
837 #endif
838   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
839   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
840 
841   /* determine if any processor has disassembled, if so we must
842      also disassemble ourself, in order that we may reassemble. */
843   /*
844      if nonzero structure of submatrix B cannot change then we know that
845      no processor disassembled thus we can skip this stuff
846   */
847   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
848     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
849     if (mat->was_assembled && !other_disassembled) {
850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
851       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
852 #endif
853       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
854     }
855   }
856   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
857     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
858   }
859   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
861   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
862 #endif
863   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
864   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
865 
866   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
867 
868   aij->rowvalues = 0;
869 
870   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
871   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
872 
873   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
874   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
875     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
876     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
877   }
878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
879   mat->offloadmask = PETSC_OFFLOAD_BOTH;
880 #endif
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
885 {
886   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
891   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
896 {
897   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
898   PetscObjectState sA, sB;
899   PetscInt        *lrows;
900   PetscInt         r, len;
901   PetscBool        cong, lch, gch;
902   PetscErrorCode   ierr;
903 
904   PetscFunctionBegin;
905   /* get locally owned rows */
906   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
907   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
908   /* fix right hand side if needed */
909   if (x && b) {
910     const PetscScalar *xx;
911     PetscScalar       *bb;
912 
913     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
914     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
915     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
916     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
917     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
918     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
919   }
920 
921   sA = mat->A->nonzerostate;
922   sB = mat->B->nonzerostate;
923 
924   if (diag != 0.0 && cong) {
925     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
926     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
927   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
928     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
929     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
930     PetscInt   nnwA, nnwB;
931     PetscBool  nnzA, nnzB;
932 
933     nnwA = aijA->nonew;
934     nnwB = aijB->nonew;
935     nnzA = aijA->keepnonzeropattern;
936     nnzB = aijB->keepnonzeropattern;
937     if (!nnzA) {
938       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
939       aijA->nonew = 0;
940     }
941     if (!nnzB) {
942       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
943       aijB->nonew = 0;
944     }
945     /* Must zero here before the next loop */
946     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
947     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
948     for (r = 0; r < len; ++r) {
949       const PetscInt row = lrows[r] + A->rmap->rstart;
950       if (row >= A->cmap->N) continue;
951       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
952     }
953     aijA->nonew = nnwA;
954     aijB->nonew = nnwB;
955   } else {
956     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
957     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958   }
959   ierr = PetscFree(lrows);CHKERRQ(ierr);
960   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962 
963   /* reduce nonzerostate */
964   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
965   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
966   if (gch) A->nonzerostate++;
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
971 {
972   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode    ierr;
974   PetscMPIInt       n = A->rmap->n;
975   PetscInt          i,j,r,m,p = 0,len = 0;
976   PetscInt          *lrows,*owners = A->rmap->range;
977   PetscSFNode       *rrows;
978   PetscSF           sf;
979   const PetscScalar *xx;
980   PetscScalar       *bb,*mask;
981   Vec               xmask,lmask;
982   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
983   const PetscInt    *aj, *ii,*ridx;
984   PetscScalar       *aa;
985 
986   PetscFunctionBegin;
987   /* Create SF where leaves are input rows and roots are owned rows */
988   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
989   for (r = 0; r < n; ++r) lrows[r] = -1;
990   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
991   for (r = 0; r < N; ++r) {
992     const PetscInt idx   = rows[r];
993     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
994     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
995       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
996     }
997     rrows[r].rank  = p;
998     rrows[r].index = rows[r] - owners[p];
999   }
1000   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1001   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1002   /* Collect flags for rows to be zeroed */
1003   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1004   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1005   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1006   /* Compress and put in row numbers */
1007   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1008   /* zero diagonal part of matrix */
1009   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1010   /* handle off diagonal part of matrix */
1011   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1012   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1013   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1014   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1015   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1016   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1017   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1019   if (x && b) { /* this code is buggy when the row and column layout don't match */
1020     PetscBool cong;
1021 
1022     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1023     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1024     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1027     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1028   }
1029   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1030   /* remove zeroed rows of off diagonal matrix */
1031   ii = aij->i;
1032   for (i=0; i<len; i++) {
1033     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1034   }
1035   /* loop over all elements of off process part of matrix zeroing removed columns*/
1036   if (aij->compressedrow.use) {
1037     m    = aij->compressedrow.nrows;
1038     ii   = aij->compressedrow.i;
1039     ridx = aij->compressedrow.rindex;
1040     for (i=0; i<m; i++) {
1041       n  = ii[i+1] - ii[i];
1042       aj = aij->j + ii[i];
1043       aa = aij->a + ii[i];
1044 
1045       for (j=0; j<n; j++) {
1046         if (PetscAbsScalar(mask[*aj])) {
1047           if (b) bb[*ridx] -= *aa*xx[*aj];
1048           *aa = 0.0;
1049         }
1050         aa++;
1051         aj++;
1052       }
1053       ridx++;
1054     }
1055   } else { /* do not use compressed row format */
1056     m = l->B->rmap->n;
1057     for (i=0; i<m; i++) {
1058       n  = ii[i+1] - ii[i];
1059       aj = aij->j + ii[i];
1060       aa = aij->a + ii[i];
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[i] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069     }
1070   }
1071   if (x && b) {
1072     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1073     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1074   }
1075   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1076   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1077   ierr = PetscFree(lrows);CHKERRQ(ierr);
1078 
1079   /* only change matrix nonzero state if pattern was allowed to be changed */
1080   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1081     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1082     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090   PetscErrorCode ierr;
1091   PetscInt       nt;
1092   VecScatter     Mvctx = a->Mvctx;
1093 
1094   PetscFunctionBegin;
1095   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1096   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1097 
1098   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1100   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1101   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1116 {
1117   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1118   PetscErrorCode ierr;
1119   VecScatter     Mvctx = a->Mvctx;
1120 
1121   PetscFunctionBegin;
1122   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1123   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1124   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1125   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1126   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   /* do nondiagonal part */
1137   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1138   /* do local part */
1139   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1140   /* add partial results together */
1141   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1147 {
1148   MPI_Comm       comm;
1149   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1150   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1151   IS             Me,Notme;
1152   PetscErrorCode ierr;
1153   PetscInt       M,N,first,last,*notme,i;
1154   PetscBool      lf;
1155   PetscMPIInt    size;
1156 
1157   PetscFunctionBegin;
1158   /* Easy test: symmetric diagonal block */
1159   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1160   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1161   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1162   if (!*f) PetscFunctionReturn(0);
1163   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1164   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1165   if (size == 1) PetscFunctionReturn(0);
1166 
1167   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1168   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1169   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1170   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1171   for (i=0; i<first; i++) notme[i] = i;
1172   for (i=last; i<M; i++) notme[i-last+first] = i;
1173   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1174   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1175   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1176   Aoff = Aoffs[0];
1177   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1178   Boff = Boffs[0];
1179   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1180   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1182   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1184   ierr = PetscFree(notme);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1189 {
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   /* do nondiagonal part */
1204   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1205   /* do local part */
1206   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1207   /* add partial results together */
1208   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1218 {
1219   PetscErrorCode ierr;
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221 
1222   PetscFunctionBegin;
1223   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1224   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1225   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230 {
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1236   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1241 {
1242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246 #if defined(PETSC_USE_LOG)
1247   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1248 #endif
1249   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1250   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1251   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1252   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1253 #if defined(PETSC_USE_CTABLE)
1254   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1255 #else
1256   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1257 #endif
1258   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1259   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1260   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1261   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1262   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1263   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1264   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1265 
1266   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1275 #if defined(PETSC_HAVE_ELEMENTAL)
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1277 #endif
1278 #if defined(PETSC_HAVE_HYPRE)
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1281 #endif
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1288 {
1289   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1290   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1291   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1292   PetscErrorCode ierr;
1293   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1294   int            fd;
1295   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1296   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1297   PetscScalar    *column_values;
1298   PetscInt       message_count,flowcontrolcount;
1299   FILE           *file;
1300 
1301   PetscFunctionBegin;
1302   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1303   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1304   nz   = A->nz + B->nz;
1305   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1306   if (!rank) {
1307     header[0] = MAT_FILE_CLASSID;
1308     header[1] = mat->rmap->N;
1309     header[2] = mat->cmap->N;
1310 
1311     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1312     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     /* get largest number of rows any processor has */
1314     rlen  = mat->rmap->n;
1315     range = mat->rmap->range;
1316     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1317   } else {
1318     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1319     rlen = mat->rmap->n;
1320   }
1321 
1322   /* load up the local row counts */
1323   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1324   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1339     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* th processor needs space a largest processor needs */
1346   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1348   cnt   = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ((col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1355     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1356   }
1357   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1358 
1359   /* store the column indices to the file */
1360   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1361   if (!rank) {
1362     MPI_Status status;
1363     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1364     for (i=1; i<size; i++) {
1365       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1366       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1367       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1368       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1370     }
1371     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1372   } else {
1373     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1374     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1375     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1376     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1377   }
1378   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1379 
1380   /* load up the local column values */
1381   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1382   cnt  = 0;
1383   for (i=0; i<mat->rmap->n; i++) {
1384     for (j=B->i[i]; j<B->i[i+1]; j++) {
1385       if (garray[B->j[j]] > cstart) break;
1386       column_values[cnt++] = B->a[j];
1387     }
1388     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1389     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1390   }
1391   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1392 
1393   /* store the column values to the file */
1394   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1395   if (!rank) {
1396     MPI_Status status;
1397     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1398     for (i=1; i<size; i++) {
1399       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1400       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1401       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1402       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1403       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1404     }
1405     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1406   } else {
1407     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1408     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1409     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1410     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1411   }
1412   ierr = PetscFree(column_values);CHKERRQ(ierr);
1413 
1414   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1415   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #include <petscdraw.h>
1420 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1421 {
1422   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1423   PetscErrorCode    ierr;
1424   PetscMPIInt       rank = aij->rank,size = aij->size;
1425   PetscBool         isdraw,iascii,isbinary;
1426   PetscViewer       sviewer;
1427   PetscViewerFormat format;
1428 
1429   PetscFunctionBegin;
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1433   if (iascii) {
1434     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1435     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1436       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1437       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1438       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1439       for (i=0; i<(PetscInt)size; i++) {
1440         nmax = PetscMax(nmax,nz[i]);
1441         nmin = PetscMin(nmin,nz[i]);
1442         navg += nz[i];
1443       }
1444       ierr = PetscFree(nz);CHKERRQ(ierr);
1445       navg = navg/size;
1446       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1447       PetscFunctionReturn(0);
1448     }
1449     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1450     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1451       MatInfo   info;
1452       PetscBool inodes;
1453 
1454       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1455       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1456       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1458       if (!inodes) {
1459         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1460                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1461       } else {
1462         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1463                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1464       }
1465       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1467       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1468       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1469       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1470       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1471       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1472       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1475       PetscInt inodecount,inodelimit,*inodes;
1476       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1477       if (inodes) {
1478         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1479       } else {
1480         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1481       }
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1484       PetscFunctionReturn(0);
1485     }
1486   } else if (isbinary) {
1487     if (size == 1) {
1488       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1489       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1490     } else {
1491       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1492     }
1493     PetscFunctionReturn(0);
1494   } else if (iascii && size == 1) {
1495     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1496     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1497     PetscFunctionReturn(0);
1498   } else if (isdraw) {
1499     PetscDraw draw;
1500     PetscBool isnull;
1501     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1502     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1503     if (isnull) PetscFunctionReturn(0);
1504   }
1505 
1506   { /* assemble the entire matrix onto first processor */
1507     Mat A = NULL, Av;
1508     IS  isrow,iscol;
1509 
1510     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1511     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1512     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1513     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1514 /*  The commented code uses MatCreateSubMatrices instead */
1515 /*
1516     Mat *AA, A = NULL, Av;
1517     IS  isrow,iscol;
1518 
1519     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1520     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1521     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1522     if (!rank) {
1523        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1524        A    = AA[0];
1525        Av   = AA[0];
1526     }
1527     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1528 */
1529     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1530     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1531     /*
1532        Everyone has to call to draw the matrix since the graphics waits are
1533        synchronized across all processors that share the PetscDraw object
1534     */
1535     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1536     if (!rank) {
1537       if (((PetscObject)mat)->name) {
1538         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1539       }
1540       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1541     }
1542     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1543     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1544     ierr = MatDestroy(&A);CHKERRQ(ierr);
1545   }
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1550 {
1551   PetscErrorCode ierr;
1552   PetscBool      iascii,isdraw,issocket,isbinary;
1553 
1554   PetscFunctionBegin;
1555   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1556   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1557   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1558   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1559   if (iascii || isdraw || isbinary || issocket) {
1560     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1566 {
1567   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1568   PetscErrorCode ierr;
1569   Vec            bb1 = 0;
1570   PetscBool      hasop;
1571 
1572   PetscFunctionBegin;
1573   if (flag == SOR_APPLY_UPPER) {
1574     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1575     PetscFunctionReturn(0);
1576   }
1577 
1578   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1579     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1580   }
1581 
1582   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1583     if (flag & SOR_ZERO_INITIAL_GUESS) {
1584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1585       its--;
1586     }
1587 
1588     while (its--) {
1589       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1590       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1591 
1592       /* update rhs: bb1 = bb - B*x */
1593       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1594       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1595 
1596       /* local sweep */
1597       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1598     }
1599   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1600     if (flag & SOR_ZERO_INITIAL_GUESS) {
1601       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602       its--;
1603     }
1604     while (its--) {
1605       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607 
1608       /* update rhs: bb1 = bb - B*x */
1609       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1610       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1611 
1612       /* local sweep */
1613       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1614     }
1615   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1616     if (flag & SOR_ZERO_INITIAL_GUESS) {
1617       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1618       its--;
1619     }
1620     while (its--) {
1621       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1622       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1623 
1624       /* update rhs: bb1 = bb - B*x */
1625       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1626       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1627 
1628       /* local sweep */
1629       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1630     }
1631   } else if (flag & SOR_EISENSTAT) {
1632     Vec xx1;
1633 
1634     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1635     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1636 
1637     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1638     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1639     if (!mat->diag) {
1640       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1641       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1642     }
1643     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1644     if (hasop) {
1645       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1646     } else {
1647       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1648     }
1649     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1650 
1651     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1652 
1653     /* local sweep */
1654     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1655     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1656     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1657   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1658 
1659   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1660 
1661   matin->factorerrortype = mat->A->factorerrortype;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1666 {
1667   Mat            aA,aB,Aperm;
1668   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1669   PetscScalar    *aa,*ba;
1670   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1671   PetscSF        rowsf,sf;
1672   IS             parcolp = NULL;
1673   PetscBool      done;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1678   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1679   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1680   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1681 
1682   /* Invert row permutation to find out where my rows should go */
1683   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1684   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1685   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1686   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1687   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1688   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1689 
1690   /* Invert column permutation to find out where my columns should go */
1691   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1692   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1693   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1694   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1695   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1696   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1697   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1698 
1699   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1700   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1701   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1702 
1703   /* Find out where my gcols should go */
1704   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1705   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1706   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1707   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1708   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1709   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1710   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1711   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1712 
1713   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1714   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1715   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1716   for (i=0; i<m; i++) {
1717     PetscInt row = rdest[i],rowner;
1718     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1719     for (j=ai[i]; j<ai[i+1]; j++) {
1720       PetscInt cowner,col = cdest[aj[j]];
1721       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1722       if (rowner == cowner) dnnz[i]++;
1723       else onnz[i]++;
1724     }
1725     for (j=bi[i]; j<bi[i+1]; j++) {
1726       PetscInt cowner,col = gcdest[bj[j]];
1727       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1728       if (rowner == cowner) dnnz[i]++;
1729       else onnz[i]++;
1730     }
1731   }
1732   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1733   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1734   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1735   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1737 
1738   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1739   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1740   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1741   for (i=0; i<m; i++) {
1742     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1743     PetscInt j0,rowlen;
1744     rowlen = ai[i+1] - ai[i];
1745     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1746       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1747       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1748     }
1749     rowlen = bi[i+1] - bi[i];
1750     for (j0=j=0; j<rowlen; j0=j) {
1751       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1752       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1753     }
1754   }
1755   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1756   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1757   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1758   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1759   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1760   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1761   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1762   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1763   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1764   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1765   *B = Aperm;
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1770 {
1771   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1772   PetscErrorCode ierr;
1773 
1774   PetscFunctionBegin;
1775   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1776   if (ghosts) *ghosts = aij->garray;
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1781 {
1782   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1783   Mat            A    = mat->A,B = mat->B;
1784   PetscErrorCode ierr;
1785   PetscLogDouble isend[5],irecv[5];
1786 
1787   PetscFunctionBegin;
1788   info->block_size = 1.0;
1789   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1790 
1791   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1792   isend[3] = info->memory;  isend[4] = info->mallocs;
1793 
1794   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1795 
1796   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1797   isend[3] += info->memory;  isend[4] += info->mallocs;
1798   if (flag == MAT_LOCAL) {
1799     info->nz_used      = isend[0];
1800     info->nz_allocated = isend[1];
1801     info->nz_unneeded  = isend[2];
1802     info->memory       = isend[3];
1803     info->mallocs      = isend[4];
1804   } else if (flag == MAT_GLOBAL_MAX) {
1805     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1806 
1807     info->nz_used      = irecv[0];
1808     info->nz_allocated = irecv[1];
1809     info->nz_unneeded  = irecv[2];
1810     info->memory       = irecv[3];
1811     info->mallocs      = irecv[4];
1812   } else if (flag == MAT_GLOBAL_SUM) {
1813     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1814 
1815     info->nz_used      = irecv[0];
1816     info->nz_allocated = irecv[1];
1817     info->nz_unneeded  = irecv[2];
1818     info->memory       = irecv[3];
1819     info->mallocs      = irecv[4];
1820   }
1821   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1822   info->fill_ratio_needed = 0;
1823   info->factor_mallocs    = 0;
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1828 {
1829   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1830   PetscErrorCode ierr;
1831 
1832   PetscFunctionBegin;
1833   switch (op) {
1834   case MAT_NEW_NONZERO_LOCATIONS:
1835   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1836   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1837   case MAT_KEEP_NONZERO_PATTERN:
1838   case MAT_NEW_NONZERO_LOCATION_ERR:
1839   case MAT_USE_INODES:
1840   case MAT_IGNORE_ZERO_ENTRIES:
1841     MatCheckPreallocated(A,1);
1842     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1843     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1844     break;
1845   case MAT_ROW_ORIENTED:
1846     MatCheckPreallocated(A,1);
1847     a->roworiented = flg;
1848 
1849     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1850     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1851     break;
1852   case MAT_NEW_DIAGONALS:
1853   case MAT_SORTED_FULL:
1854     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1855     break;
1856   case MAT_IGNORE_OFF_PROC_ENTRIES:
1857     a->donotstash = flg;
1858     break;
1859   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1860   case MAT_SPD:
1861   case MAT_SYMMETRIC:
1862   case MAT_STRUCTURALLY_SYMMETRIC:
1863   case MAT_HERMITIAN:
1864   case MAT_SYMMETRY_ETERNAL:
1865     break;
1866   case MAT_SUBMAT_SINGLEIS:
1867     A->submat_singleis = flg;
1868     break;
1869   case MAT_STRUCTURE_ONLY:
1870     /* The option is handled directly by MatSetOption() */
1871     break;
1872   default:
1873     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1874   }
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1879 {
1880   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1881   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1882   PetscErrorCode ierr;
1883   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1884   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1885   PetscInt       *cmap,*idx_p;
1886 
1887   PetscFunctionBegin;
1888   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1889   mat->getrowactive = PETSC_TRUE;
1890 
1891   if (!mat->rowvalues && (idx || v)) {
1892     /*
1893         allocate enough space to hold information from the longest row.
1894     */
1895     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1896     PetscInt   max = 1,tmp;
1897     for (i=0; i<matin->rmap->n; i++) {
1898       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1899       if (max < tmp) max = tmp;
1900     }
1901     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1902   }
1903 
1904   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1905   lrow = row - rstart;
1906 
1907   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1908   if (!v)   {pvA = 0; pvB = 0;}
1909   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1910   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1911   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1912   nztot = nzA + nzB;
1913 
1914   cmap = mat->garray;
1915   if (v  || idx) {
1916     if (nztot) {
1917       /* Sort by increasing column numbers, assuming A and B already sorted */
1918       PetscInt imark = -1;
1919       if (v) {
1920         *v = v_p = mat->rowvalues;
1921         for (i=0; i<nzB; i++) {
1922           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1923           else break;
1924         }
1925         imark = i;
1926         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1927         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1928       }
1929       if (idx) {
1930         *idx = idx_p = mat->rowindices;
1931         if (imark > -1) {
1932           for (i=0; i<imark; i++) {
1933             idx_p[i] = cmap[cworkB[i]];
1934           }
1935         } else {
1936           for (i=0; i<nzB; i++) {
1937             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1938             else break;
1939           }
1940           imark = i;
1941         }
1942         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1943         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1944       }
1945     } else {
1946       if (idx) *idx = 0;
1947       if (v)   *v   = 0;
1948     }
1949   }
1950   *nz  = nztot;
1951   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1957 {
1958   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1959 
1960   PetscFunctionBegin;
1961   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1962   aij->getrowactive = PETSC_FALSE;
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1967 {
1968   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1969   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1970   PetscErrorCode ierr;
1971   PetscInt       i,j,cstart = mat->cmap->rstart;
1972   PetscReal      sum = 0.0;
1973   MatScalar      *v;
1974 
1975   PetscFunctionBegin;
1976   if (aij->size == 1) {
1977     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1978   } else {
1979     if (type == NORM_FROBENIUS) {
1980       v = amat->a;
1981       for (i=0; i<amat->nz; i++) {
1982         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1983       }
1984       v = bmat->a;
1985       for (i=0; i<bmat->nz; i++) {
1986         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1987       }
1988       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1989       *norm = PetscSqrtReal(*norm);
1990       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1991     } else if (type == NORM_1) { /* max column norm */
1992       PetscReal *tmp,*tmp2;
1993       PetscInt  *jj,*garray = aij->garray;
1994       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1995       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1996       *norm = 0.0;
1997       v     = amat->a; jj = amat->j;
1998       for (j=0; j<amat->nz; j++) {
1999         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2000       }
2001       v = bmat->a; jj = bmat->j;
2002       for (j=0; j<bmat->nz; j++) {
2003         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2004       }
2005       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2006       for (j=0; j<mat->cmap->N; j++) {
2007         if (tmp2[j] > *norm) *norm = tmp2[j];
2008       }
2009       ierr = PetscFree(tmp);CHKERRQ(ierr);
2010       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2011       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2012     } else if (type == NORM_INFINITY) { /* max row norm */
2013       PetscReal ntemp = 0.0;
2014       for (j=0; j<aij->A->rmap->n; j++) {
2015         v   = amat->a + amat->i[j];
2016         sum = 0.0;
2017         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2018           sum += PetscAbsScalar(*v); v++;
2019         }
2020         v = bmat->a + bmat->i[j];
2021         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2022           sum += PetscAbsScalar(*v); v++;
2023         }
2024         if (sum > ntemp) ntemp = sum;
2025       }
2026       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2027       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2028     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2034 {
2035   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2036   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2037   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2038   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2039   PetscErrorCode  ierr;
2040   Mat             B,A_diag,*B_diag;
2041   const MatScalar *array;
2042 
2043   PetscFunctionBegin;
2044   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2045   ai = Aloc->i; aj = Aloc->j;
2046   bi = Bloc->i; bj = Bloc->j;
2047   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2048     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2049     PetscSFNode          *oloc;
2050     PETSC_UNUSED PetscSF sf;
2051 
2052     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2053     /* compute d_nnz for preallocation */
2054     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2055     for (i=0; i<ai[ma]; i++) {
2056       d_nnz[aj[i]]++;
2057     }
2058     /* compute local off-diagonal contributions */
2059     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2060     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2061     /* map those to global */
2062     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2063     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2064     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2065     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2066     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2067     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2068     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2069 
2070     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2071     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2072     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2073     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2074     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2075     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2076   } else {
2077     B    = *matout;
2078     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2079   }
2080 
2081   b           = (Mat_MPIAIJ*)B->data;
2082   A_diag      = a->A;
2083   B_diag      = &b->A;
2084   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2085   A_diag_ncol = A_diag->cmap->N;
2086   B_diag_ilen = sub_B_diag->ilen;
2087   B_diag_i    = sub_B_diag->i;
2088 
2089   /* Set ilen for diagonal of B */
2090   for (i=0; i<A_diag_ncol; i++) {
2091     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2092   }
2093 
2094   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2095   very quickly (=without using MatSetValues), because all writes are local. */
2096   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2097 
2098   /* copy over the B part */
2099   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2100   array = Bloc->a;
2101   row   = A->rmap->rstart;
2102   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2103   cols_tmp = cols;
2104   for (i=0; i<mb; i++) {
2105     ncol = bi[i+1]-bi[i];
2106     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2107     row++;
2108     array += ncol; cols_tmp += ncol;
2109   }
2110   ierr = PetscFree(cols);CHKERRQ(ierr);
2111 
2112   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2113   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2114   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2115     *matout = B;
2116   } else {
2117     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2123 {
2124   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2125   Mat            a    = aij->A,b = aij->B;
2126   PetscErrorCode ierr;
2127   PetscInt       s1,s2,s3;
2128 
2129   PetscFunctionBegin;
2130   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2131   if (rr) {
2132     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2133     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2134     /* Overlap communication with computation. */
2135     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2136   }
2137   if (ll) {
2138     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2139     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2140     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2141   }
2142   /* scale  the diagonal block */
2143   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2144 
2145   if (rr) {
2146     /* Do a scatter end and then right scale the off-diagonal block */
2147     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2148     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2154 {
2155   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2156   PetscErrorCode ierr;
2157 
2158   PetscFunctionBegin;
2159   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2164 {
2165   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2166   Mat            a,b,c,d;
2167   PetscBool      flg;
2168   PetscErrorCode ierr;
2169 
2170   PetscFunctionBegin;
2171   a = matA->A; b = matA->B;
2172   c = matB->A; d = matB->B;
2173 
2174   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2175   if (flg) {
2176     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2177   }
2178   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2183 {
2184   PetscErrorCode ierr;
2185   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2186   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2187 
2188   PetscFunctionBegin;
2189   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2190   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2191     /* because of the column compression in the off-processor part of the matrix a->B,
2192        the number of columns in a->B and b->B may be different, hence we cannot call
2193        the MatCopy() directly on the two parts. If need be, we can provide a more
2194        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2195        then copying the submatrices */
2196     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2197   } else {
2198     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2199     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2200   }
2201   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2206 {
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 /*
2215    Computes the number of nonzeros per row needed for preallocation when X and Y
2216    have different nonzero structure.
2217 */
2218 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2219 {
2220   PetscInt       i,j,k,nzx,nzy;
2221 
2222   PetscFunctionBegin;
2223   /* Set the number of nonzeros in the new matrix */
2224   for (i=0; i<m; i++) {
2225     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2226     nzx = xi[i+1] - xi[i];
2227     nzy = yi[i+1] - yi[i];
2228     nnz[i] = 0;
2229     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2230       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2231       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2232       nnz[i]++;
2233     }
2234     for (; k<nzy; k++) nnz[i]++;
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2240 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2241 {
2242   PetscErrorCode ierr;
2243   PetscInt       m = Y->rmap->N;
2244   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2245   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2246 
2247   PetscFunctionBegin;
2248   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2253 {
2254   PetscErrorCode ierr;
2255   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2256   PetscBLASInt   bnz,one=1;
2257   Mat_SeqAIJ     *x,*y;
2258 
2259   PetscFunctionBegin;
2260   if (str == SAME_NONZERO_PATTERN) {
2261     PetscScalar alpha = a;
2262     x    = (Mat_SeqAIJ*)xx->A->data;
2263     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2264     y    = (Mat_SeqAIJ*)yy->A->data;
2265     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2266     x    = (Mat_SeqAIJ*)xx->B->data;
2267     y    = (Mat_SeqAIJ*)yy->B->data;
2268     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2269     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2270     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2271     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2272        will be updated */
2273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2274     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2275       Y->offloadmask = PETSC_OFFLOAD_CPU;
2276     }
2277 #endif
2278   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2279     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2280   } else {
2281     Mat      B;
2282     PetscInt *nnz_d,*nnz_o;
2283     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2284     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2285     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2286     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2287     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2288     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2289     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2290     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2291     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2292     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2293     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2294     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2295     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2296     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2297   }
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2302 
2303 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2304 {
2305 #if defined(PETSC_USE_COMPLEX)
2306   PetscErrorCode ierr;
2307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2311   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2312 #else
2313   PetscFunctionBegin;
2314 #endif
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2319 {
2320   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2321   PetscErrorCode ierr;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2325   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2330 {
2331   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2336   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2341 {
2342   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2343   PetscErrorCode ierr;
2344   PetscInt       i,*idxb = 0;
2345   PetscScalar    *va,*vb;
2346   Vec            vtmp;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351   if (idx) {
2352     for (i=0; i<A->rmap->n; i++) {
2353       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2354     }
2355   }
2356 
2357   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2358   if (idx) {
2359     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2360   }
2361   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2362   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2363 
2364   for (i=0; i<A->rmap->n; i++) {
2365     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2366       va[i] = vb[i];
2367       if (idx) idx[i] = a->garray[idxb[i]];
2368     }
2369   }
2370 
2371   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2372   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2373   ierr = PetscFree(idxb);CHKERRQ(ierr);
2374   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2379 {
2380   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2381   PetscErrorCode ierr;
2382   PetscInt       i,*idxb = 0;
2383   PetscScalar    *va,*vb;
2384   Vec            vtmp;
2385 
2386   PetscFunctionBegin;
2387   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2388   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2389   if (idx) {
2390     for (i=0; i<A->cmap->n; i++) {
2391       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2392     }
2393   }
2394 
2395   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2396   if (idx) {
2397     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2398   }
2399   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2400   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2401 
2402   for (i=0; i<A->rmap->n; i++) {
2403     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2404       va[i] = vb[i];
2405       if (idx) idx[i] = a->garray[idxb[i]];
2406     }
2407   }
2408 
2409   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2410   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2411   ierr = PetscFree(idxb);CHKERRQ(ierr);
2412   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2417 {
2418   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2419   PetscInt       n      = A->rmap->n;
2420   PetscInt       cstart = A->cmap->rstart;
2421   PetscInt       *cmap  = mat->garray;
2422   PetscInt       *diagIdx, *offdiagIdx;
2423   Vec            diagV, offdiagV;
2424   PetscScalar    *a, *diagA, *offdiagA;
2425   PetscInt       r;
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2430   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2431   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2432   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2433   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2434   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2435   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2436   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2437   for (r = 0; r < n; ++r) {
2438     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2439       a[r]   = diagA[r];
2440       idx[r] = cstart + diagIdx[r];
2441     } else {
2442       a[r]   = offdiagA[r];
2443       idx[r] = cmap[offdiagIdx[r]];
2444     }
2445   }
2446   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2447   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2448   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2449   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2450   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2451   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2456 {
2457   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2458   PetscInt       n      = A->rmap->n;
2459   PetscInt       cstart = A->cmap->rstart;
2460   PetscInt       *cmap  = mat->garray;
2461   PetscInt       *diagIdx, *offdiagIdx;
2462   Vec            diagV, offdiagV;
2463   PetscScalar    *a, *diagA, *offdiagA;
2464   PetscInt       r;
2465   PetscErrorCode ierr;
2466 
2467   PetscFunctionBegin;
2468   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2469   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2470   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2471   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2472   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2473   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2474   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2475   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2476   for (r = 0; r < n; ++r) {
2477     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2478       a[r]   = diagA[r];
2479       idx[r] = cstart + diagIdx[r];
2480     } else {
2481       a[r]   = offdiagA[r];
2482       idx[r] = cmap[offdiagIdx[r]];
2483     }
2484   }
2485   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2486   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2487   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2488   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2489   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2490   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2495 {
2496   PetscErrorCode ierr;
2497   Mat            *dummy;
2498 
2499   PetscFunctionBegin;
2500   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2501   *newmat = *dummy;
2502   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2507 {
2508   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2509   PetscErrorCode ierr;
2510 
2511   PetscFunctionBegin;
2512   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2513   A->factorerrortype = a->A->factorerrortype;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2518 {
2519   PetscErrorCode ierr;
2520   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2521 
2522   PetscFunctionBegin;
2523   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2524   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2525   if (x->assembled) {
2526     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2527   } else {
2528     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2529   }
2530   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2531   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2536 {
2537   PetscFunctionBegin;
2538   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2539   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 /*@
2544    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2545 
2546    Collective on Mat
2547 
2548    Input Parameters:
2549 +    A - the matrix
2550 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2551 
2552  Level: advanced
2553 
2554 @*/
2555 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2556 {
2557   PetscErrorCode       ierr;
2558 
2559   PetscFunctionBegin;
2560   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2565 {
2566   PetscErrorCode       ierr;
2567   PetscBool            sc = PETSC_FALSE,flg;
2568 
2569   PetscFunctionBegin;
2570   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2571   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2572   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2573   if (flg) {
2574     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2575   }
2576   ierr = PetscOptionsTail();CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2581 {
2582   PetscErrorCode ierr;
2583   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2584   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2585 
2586   PetscFunctionBegin;
2587   if (!Y->preallocated) {
2588     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2589   } else if (!aij->nz) {
2590     PetscInt nonew = aij->nonew;
2591     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2592     aij->nonew = nonew;
2593   }
2594   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2599 {
2600   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2601   PetscErrorCode ierr;
2602 
2603   PetscFunctionBegin;
2604   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2605   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2606   if (d) {
2607     PetscInt rstart;
2608     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2609     *d += rstart;
2610 
2611   }
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2616 {
2617   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2618   PetscErrorCode ierr;
2619 
2620   PetscFunctionBegin;
2621   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 /* -------------------------------------------------------------------*/
2626 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2627                                        MatGetRow_MPIAIJ,
2628                                        MatRestoreRow_MPIAIJ,
2629                                        MatMult_MPIAIJ,
2630                                 /* 4*/ MatMultAdd_MPIAIJ,
2631                                        MatMultTranspose_MPIAIJ,
2632                                        MatMultTransposeAdd_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*10*/ 0,
2637                                        0,
2638                                        0,
2639                                        MatSOR_MPIAIJ,
2640                                        MatTranspose_MPIAIJ,
2641                                 /*15*/ MatGetInfo_MPIAIJ,
2642                                        MatEqual_MPIAIJ,
2643                                        MatGetDiagonal_MPIAIJ,
2644                                        MatDiagonalScale_MPIAIJ,
2645                                        MatNorm_MPIAIJ,
2646                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2647                                        MatAssemblyEnd_MPIAIJ,
2648                                        MatSetOption_MPIAIJ,
2649                                        MatZeroEntries_MPIAIJ,
2650                                 /*24*/ MatZeroRows_MPIAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*29*/ MatSetUp_MPIAIJ,
2656                                        0,
2657                                        0,
2658                                        MatGetDiagonalBlock_MPIAIJ,
2659                                        0,
2660                                 /*34*/ MatDuplicate_MPIAIJ,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                 /*39*/ MatAXPY_MPIAIJ,
2666                                        MatCreateSubMatrices_MPIAIJ,
2667                                        MatIncreaseOverlap_MPIAIJ,
2668                                        MatGetValues_MPIAIJ,
2669                                        MatCopy_MPIAIJ,
2670                                 /*44*/ MatGetRowMax_MPIAIJ,
2671                                        MatScale_MPIAIJ,
2672                                        MatShift_MPIAIJ,
2673                                        MatDiagonalSet_MPIAIJ,
2674                                        MatZeroRowsColumns_MPIAIJ,
2675                                 /*49*/ MatSetRandom_MPIAIJ,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2681                                        0,
2682                                        MatSetUnfactored_MPIAIJ,
2683                                        MatPermute_MPIAIJ,
2684                                        0,
2685                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2686                                        MatDestroy_MPIAIJ,
2687                                        MatView_MPIAIJ,
2688                                        0,
2689                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2690                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2691                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2696                                        MatGetRowMinAbs_MPIAIJ,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                 /*75*/ MatFDColoringApply_AIJ,
2702                                        MatSetFromOptions_MPIAIJ,
2703                                        0,
2704                                        0,
2705                                        MatFindZeroDiagonals_MPIAIJ,
2706                                 /*80*/ 0,
2707                                        0,
2708                                        0,
2709                                 /*83*/ MatLoad_MPIAIJ,
2710                                        MatIsSymmetric_MPIAIJ,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2716                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2717                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2718                                        MatPtAP_MPIAIJ_MPIAIJ,
2719                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2720                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        MatPinToCPU_MPIAIJ,
2725                                 /*99*/ 0,
2726                                        0,
2727                                        0,
2728                                        MatConjugate_MPIAIJ,
2729                                        0,
2730                                 /*104*/MatSetValuesRow_MPIAIJ,
2731                                        MatRealPart_MPIAIJ,
2732                                        MatImaginaryPart_MPIAIJ,
2733                                        0,
2734                                        0,
2735                                 /*109*/0,
2736                                        0,
2737                                        MatGetRowMin_MPIAIJ,
2738                                        0,
2739                                        MatMissingDiagonal_MPIAIJ,
2740                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2741                                        0,
2742                                        MatGetGhosts_MPIAIJ,
2743                                        0,
2744                                        0,
2745                                 /*119*/0,
2746                                        0,
2747                                        0,
2748                                        0,
2749                                        MatGetMultiProcBlock_MPIAIJ,
2750                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2751                                        MatGetColumnNorms_MPIAIJ,
2752                                        MatInvertBlockDiagonal_MPIAIJ,
2753                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2754                                        MatCreateSubMatricesMPI_MPIAIJ,
2755                                 /*129*/0,
2756                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2757                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2758                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2759                                        0,
2760                                 /*134*/0,
2761                                        0,
2762                                        MatRARt_MPIAIJ_MPIAIJ,
2763                                        0,
2764                                        0,
2765                                 /*139*/MatSetBlockSizes_MPIAIJ,
2766                                        0,
2767                                        0,
2768                                        MatFDColoringSetUp_MPIXAIJ,
2769                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2770                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2771 };
2772 
2773 /* ----------------------------------------------------------------------------------------*/
2774 
2775 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2776 {
2777   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2778   PetscErrorCode ierr;
2779 
2780   PetscFunctionBegin;
2781   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2782   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2787 {
2788   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2793   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2798 {
2799   Mat_MPIAIJ     *b;
2800   PetscErrorCode ierr;
2801   PetscMPIInt    size;
2802 
2803   PetscFunctionBegin;
2804   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2805   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2806   b = (Mat_MPIAIJ*)B->data;
2807 
2808 #if defined(PETSC_USE_CTABLE)
2809   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2810 #else
2811   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2812 #endif
2813   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2814   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2815   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2816 
2817   /* Because the B will have been resized we simply destroy it and create a new one each time */
2818   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2819   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2820   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2821   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2822   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2823   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2824   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2825 
2826   if (!B->preallocated) {
2827     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2828     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2829     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2830     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2831     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2832   }
2833 
2834   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2835   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2836   B->preallocated  = PETSC_TRUE;
2837   B->was_assembled = PETSC_FALSE;
2838   B->assembled     = PETSC_FALSE;
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2843 {
2844   Mat_MPIAIJ     *b;
2845   PetscErrorCode ierr;
2846 
2847   PetscFunctionBegin;
2848   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2849   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2850   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2851   b = (Mat_MPIAIJ*)B->data;
2852 
2853 #if defined(PETSC_USE_CTABLE)
2854   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2855 #else
2856   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2857 #endif
2858   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2859   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2860   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2861 
2862   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2863   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2864   B->preallocated  = PETSC_TRUE;
2865   B->was_assembled = PETSC_FALSE;
2866   B->assembled = PETSC_FALSE;
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2871 {
2872   Mat            mat;
2873   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2874   PetscErrorCode ierr;
2875 
2876   PetscFunctionBegin;
2877   *newmat = 0;
2878   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2879   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2880   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2881   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2882   a       = (Mat_MPIAIJ*)mat->data;
2883 
2884   mat->factortype   = matin->factortype;
2885   mat->assembled    = PETSC_TRUE;
2886   mat->insertmode   = NOT_SET_VALUES;
2887   mat->preallocated = PETSC_TRUE;
2888 
2889   a->size         = oldmat->size;
2890   a->rank         = oldmat->rank;
2891   a->donotstash   = oldmat->donotstash;
2892   a->roworiented  = oldmat->roworiented;
2893   a->rowindices   = 0;
2894   a->rowvalues    = 0;
2895   a->getrowactive = PETSC_FALSE;
2896 
2897   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2898   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2899 
2900   if (oldmat->colmap) {
2901 #if defined(PETSC_USE_CTABLE)
2902     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2903 #else
2904     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2905     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2906     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2907 #endif
2908   } else a->colmap = 0;
2909   if (oldmat->garray) {
2910     PetscInt len;
2911     len  = oldmat->B->cmap->n;
2912     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2913     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2914     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2915   } else a->garray = 0;
2916 
2917   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2918   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2919   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2920   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2921 
2922   if (oldmat->Mvctx_mpi1) {
2923     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2924     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2925   }
2926 
2927   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2928   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2929   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2930   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2931   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2932   *newmat = mat;
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2937 {
2938   PetscBool      isbinary, ishdf5;
2939   PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2943   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2944   /* force binary viewer to load .info file if it has not yet done so */
2945   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2946   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2947   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2948   if (isbinary) {
2949     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2950   } else if (ishdf5) {
2951 #if defined(PETSC_HAVE_HDF5)
2952     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2953 #else
2954     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2955 #endif
2956   } else {
2957     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2958   }
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer)
2963 {
2964   PetscScalar    *vals,*svals;
2965   MPI_Comm       comm;
2966   PetscErrorCode ierr;
2967   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2968   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2969   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2970   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2971   PetscInt       cend,cstart,n,*rowners;
2972   int            fd;
2973   PetscInt       bs = newMat->rmap->bs;
2974 
2975   PetscFunctionBegin;
2976   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2977   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2978   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2979   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2980   if (!rank) {
2981     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2982     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2983     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2984   }
2985 
2986   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2987   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2988   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2989   if (bs < 0) bs = 1;
2990 
2991   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2992   M    = header[1]; N = header[2];
2993 
2994   /* If global sizes are set, check if they are consistent with that given in the file */
2995   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2996   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2997 
2998   /* determine ownership of all (block) rows */
2999   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3000   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3001   else m = newMat->rmap->n; /* Set by user */
3002 
3003   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
3004   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3005 
3006   /* First process needs enough room for process with most rows */
3007   if (!rank) {
3008     mmax = rowners[1];
3009     for (i=2; i<=size; i++) {
3010       mmax = PetscMax(mmax, rowners[i]);
3011     }
3012   } else mmax = -1;             /* unused, but compilers complain */
3013 
3014   rowners[0] = 0;
3015   for (i=2; i<=size; i++) {
3016     rowners[i] += rowners[i-1];
3017   }
3018   rstart = rowners[rank];
3019   rend   = rowners[rank+1];
3020 
3021   /* distribute row lengths to all processors */
3022   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3023   if (!rank) {
3024     ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr);
3025     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3026     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3027     for (j=0; j<m; j++) {
3028       procsnz[0] += ourlens[j];
3029     }
3030     for (i=1; i<size; i++) {
3031       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr);
3032       /* calculate the number of nonzeros on each processor */
3033       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3034         procsnz[i] += rowlengths[j];
3035       }
3036       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3037     }
3038     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3039   } else {
3040     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3041   }
3042 
3043   if (!rank) {
3044     /* determine max buffer needed and allocate it */
3045     maxnz = 0;
3046     for (i=0; i<size; i++) {
3047       maxnz = PetscMax(maxnz,procsnz[i]);
3048     }
3049     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3050 
3051     /* read in my part of the matrix column indices  */
3052     nz   = procsnz[0];
3053     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3054     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3055 
3056     /* read in every one elses and ship off */
3057     for (i=1; i<size; i++) {
3058       nz   = procsnz[i];
3059       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3060       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3061     }
3062     ierr = PetscFree(cols);CHKERRQ(ierr);
3063   } else {
3064     /* determine buffer space needed for message */
3065     nz = 0;
3066     for (i=0; i<m; i++) {
3067       nz += ourlens[i];
3068     }
3069     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3070 
3071     /* receive message of column indices*/
3072     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3073   }
3074 
3075   /* determine column ownership if matrix is not square */
3076   if (N != M) {
3077     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3078     else n = newMat->cmap->n;
3079     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3080     cstart = cend - n;
3081   } else {
3082     cstart = rstart;
3083     cend   = rend;
3084     n      = cend - cstart;
3085   }
3086 
3087   /* loop over local rows, determining number of off diagonal entries */
3088   ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr);
3089   jj   = 0;
3090   for (i=0; i<m; i++) {
3091     for (j=0; j<ourlens[i]; j++) {
3092       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3093       jj++;
3094     }
3095   }
3096 
3097   for (i=0; i<m; i++) {
3098     ourlens[i] -= offlens[i];
3099   }
3100   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3101 
3102   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3103 
3104   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3105 
3106   for (i=0; i<m; i++) {
3107     ourlens[i] += offlens[i];
3108   }
3109 
3110   if (!rank) {
3111     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
3112 
3113     /* read in my part of the matrix numerical values  */
3114     nz   = procsnz[0];
3115     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3116 
3117     /* insert into matrix */
3118     jj      = rstart;
3119     smycols = mycols;
3120     svals   = vals;
3121     for (i=0; i<m; i++) {
3122       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3123       smycols += ourlens[i];
3124       svals   += ourlens[i];
3125       jj++;
3126     }
3127 
3128     /* read in other processors and ship out */
3129     for (i=1; i<size; i++) {
3130       nz   = procsnz[i];
3131       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3132       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3133     }
3134     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3135   } else {
3136     /* receive numeric values */
3137     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
3138 
3139     /* receive message of values*/
3140     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3141 
3142     /* insert into matrix */
3143     jj      = rstart;
3144     smycols = mycols;
3145     svals   = vals;
3146     for (i=0; i<m; i++) {
3147       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3148       smycols += ourlens[i];
3149       svals   += ourlens[i];
3150       jj++;
3151     }
3152   }
3153   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3154   ierr = PetscFree(vals);CHKERRQ(ierr);
3155   ierr = PetscFree(mycols);CHKERRQ(ierr);
3156   ierr = PetscFree(rowners);CHKERRQ(ierr);
3157   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3158   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 /* Not scalable because of ISAllGather() unless getting all columns. */
3163 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3164 {
3165   PetscErrorCode ierr;
3166   IS             iscol_local;
3167   PetscBool      isstride;
3168   PetscMPIInt    lisstride=0,gisstride;
3169 
3170   PetscFunctionBegin;
3171   /* check if we are grabbing all columns*/
3172   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3173 
3174   if (isstride) {
3175     PetscInt  start,len,mstart,mlen;
3176     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3177     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3178     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3179     if (mstart == start && mlen-mstart == len) lisstride = 1;
3180   }
3181 
3182   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3183   if (gisstride) {
3184     PetscInt N;
3185     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3186     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3187     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3188     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3189   } else {
3190     PetscInt cbs;
3191     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3192     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3193     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3194   }
3195 
3196   *isseq = iscol_local;
3197   PetscFunctionReturn(0);
3198 }
3199 
3200 /*
3201  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3202  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3203 
3204  Input Parameters:
3205    mat - matrix
3206    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3207            i.e., mat->rstart <= isrow[i] < mat->rend
3208    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3209            i.e., mat->cstart <= iscol[i] < mat->cend
3210  Output Parameter:
3211    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3212    iscol_o - sequential column index set for retrieving mat->B
3213    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3214  */
3215 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3216 {
3217   PetscErrorCode ierr;
3218   Vec            x,cmap;
3219   const PetscInt *is_idx;
3220   PetscScalar    *xarray,*cmaparray;
3221   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3222   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3223   Mat            B=a->B;
3224   Vec            lvec=a->lvec,lcmap;
3225   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3226   MPI_Comm       comm;
3227   VecScatter     Mvctx=a->Mvctx;
3228 
3229   PetscFunctionBegin;
3230   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3231   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3232 
3233   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3234   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3235   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3236   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3237   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3238 
3239   /* Get start indices */
3240   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3241   isstart -= ncols;
3242   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3243 
3244   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3245   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3246   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3247   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3248   for (i=0; i<ncols; i++) {
3249     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3250     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3251     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3252   }
3253   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3254   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3255   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3256 
3257   /* Get iscol_d */
3258   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3259   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3260   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3261 
3262   /* Get isrow_d */
3263   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3264   rstart = mat->rmap->rstart;
3265   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3266   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3267   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3268   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3269 
3270   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3271   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3272   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3273 
3274   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3275   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3276   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3277 
3278   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3279 
3280   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3281   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3282 
3283   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3284   /* off-process column indices */
3285   count = 0;
3286   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3287   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3288 
3289   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3290   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3291   for (i=0; i<Bn; i++) {
3292     if (PetscRealPart(xarray[i]) > -1.0) {
3293       idx[count]     = i;                   /* local column index in off-diagonal part B */
3294       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3295       count++;
3296     }
3297   }
3298   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3299   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3300 
3301   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3302   /* cannot ensure iscol_o has same blocksize as iscol! */
3303 
3304   ierr = PetscFree(idx);CHKERRQ(ierr);
3305   *garray = cmap1;
3306 
3307   ierr = VecDestroy(&x);CHKERRQ(ierr);
3308   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3309   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3310   PetscFunctionReturn(0);
3311 }
3312 
3313 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3314 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3315 {
3316   PetscErrorCode ierr;
3317   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3318   Mat            M = NULL;
3319   MPI_Comm       comm;
3320   IS             iscol_d,isrow_d,iscol_o;
3321   Mat            Asub = NULL,Bsub = NULL;
3322   PetscInt       n;
3323 
3324   PetscFunctionBegin;
3325   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3326 
3327   if (call == MAT_REUSE_MATRIX) {
3328     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3329     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3330     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3331 
3332     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3333     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3334 
3335     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3336     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3337 
3338     /* Update diagonal and off-diagonal portions of submat */
3339     asub = (Mat_MPIAIJ*)(*submat)->data;
3340     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3341     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3342     if (n) {
3343       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3344     }
3345     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3346     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3347 
3348   } else { /* call == MAT_INITIAL_MATRIX) */
3349     const PetscInt *garray;
3350     PetscInt        BsubN;
3351 
3352     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3353     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3354 
3355     /* Create local submatrices Asub and Bsub */
3356     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3357     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3358 
3359     /* Create submatrix M */
3360     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3361 
3362     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3363     asub = (Mat_MPIAIJ*)M->data;
3364 
3365     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3366     n = asub->B->cmap->N;
3367     if (BsubN > n) {
3368       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3369       const PetscInt *idx;
3370       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3371       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3372 
3373       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3374       j = 0;
3375       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3376       for (i=0; i<n; i++) {
3377         if (j >= BsubN) break;
3378         while (subgarray[i] > garray[j]) j++;
3379 
3380         if (subgarray[i] == garray[j]) {
3381           idx_new[i] = idx[j++];
3382         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3383       }
3384       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3385 
3386       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3387       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3388 
3389     } else if (BsubN < n) {
3390       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3391     }
3392 
3393     ierr = PetscFree(garray);CHKERRQ(ierr);
3394     *submat = M;
3395 
3396     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3397     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3398     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3399 
3400     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3401     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3402 
3403     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3404     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3405   }
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3410 {
3411   PetscErrorCode ierr;
3412   IS             iscol_local=NULL,isrow_d;
3413   PetscInt       csize;
3414   PetscInt       n,i,j,start,end;
3415   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3416   MPI_Comm       comm;
3417 
3418   PetscFunctionBegin;
3419   /* If isrow has same processor distribution as mat,
3420      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3421   if (call == MAT_REUSE_MATRIX) {
3422     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3423     if (isrow_d) {
3424       sameRowDist  = PETSC_TRUE;
3425       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3426     } else {
3427       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3428       if (iscol_local) {
3429         sameRowDist  = PETSC_TRUE;
3430         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3431       }
3432     }
3433   } else {
3434     /* Check if isrow has same processor distribution as mat */
3435     sameDist[0] = PETSC_FALSE;
3436     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3437     if (!n) {
3438       sameDist[0] = PETSC_TRUE;
3439     } else {
3440       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3441       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3442       if (i >= start && j < end) {
3443         sameDist[0] = PETSC_TRUE;
3444       }
3445     }
3446 
3447     /* Check if iscol has same processor distribution as mat */
3448     sameDist[1] = PETSC_FALSE;
3449     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3450     if (!n) {
3451       sameDist[1] = PETSC_TRUE;
3452     } else {
3453       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3454       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3455       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3456     }
3457 
3458     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3459     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3460     sameRowDist = tsameDist[0];
3461   }
3462 
3463   if (sameRowDist) {
3464     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3465       /* isrow and iscol have same processor distribution as mat */
3466       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3467       PetscFunctionReturn(0);
3468     } else { /* sameRowDist */
3469       /* isrow has same processor distribution as mat */
3470       if (call == MAT_INITIAL_MATRIX) {
3471         PetscBool sorted;
3472         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3473         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3474         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3475         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3476 
3477         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3478         if (sorted) {
3479           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3480           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3481           PetscFunctionReturn(0);
3482         }
3483       } else { /* call == MAT_REUSE_MATRIX */
3484         IS    iscol_sub;
3485         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3486         if (iscol_sub) {
3487           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3488           PetscFunctionReturn(0);
3489         }
3490       }
3491     }
3492   }
3493 
3494   /* General case: iscol -> iscol_local which has global size of iscol */
3495   if (call == MAT_REUSE_MATRIX) {
3496     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3497     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3498   } else {
3499     if (!iscol_local) {
3500       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3501     }
3502   }
3503 
3504   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3505   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3506 
3507   if (call == MAT_INITIAL_MATRIX) {
3508     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3509     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3510   }
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 /*@C
3515      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3516          and "off-diagonal" part of the matrix in CSR format.
3517 
3518    Collective
3519 
3520    Input Parameters:
3521 +  comm - MPI communicator
3522 .  A - "diagonal" portion of matrix
3523 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3524 -  garray - global index of B columns
3525 
3526    Output Parameter:
3527 .   mat - the matrix, with input A as its local diagonal matrix
3528    Level: advanced
3529 
3530    Notes:
3531        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3532        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3533 
3534 .seealso: MatCreateMPIAIJWithSplitArrays()
3535 @*/
3536 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3537 {
3538   PetscErrorCode ierr;
3539   Mat_MPIAIJ     *maij;
3540   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3541   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3542   PetscScalar    *oa=b->a;
3543   Mat            Bnew;
3544   PetscInt       m,n,N;
3545 
3546   PetscFunctionBegin;
3547   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3548   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3549   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3550   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3551   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3552   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3553 
3554   /* Get global columns of mat */
3555   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3556 
3557   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3558   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3559   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3560   maij = (Mat_MPIAIJ*)(*mat)->data;
3561 
3562   (*mat)->preallocated = PETSC_TRUE;
3563 
3564   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3565   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3566 
3567   /* Set A as diagonal portion of *mat */
3568   maij->A = A;
3569 
3570   nz = oi[m];
3571   for (i=0; i<nz; i++) {
3572     col   = oj[i];
3573     oj[i] = garray[col];
3574   }
3575 
3576    /* Set Bnew as off-diagonal portion of *mat */
3577   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3578   bnew        = (Mat_SeqAIJ*)Bnew->data;
3579   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3580   maij->B     = Bnew;
3581 
3582   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3583 
3584   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3585   b->free_a       = PETSC_FALSE;
3586   b->free_ij      = PETSC_FALSE;
3587   ierr = MatDestroy(&B);CHKERRQ(ierr);
3588 
3589   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3590   bnew->free_a       = PETSC_TRUE;
3591   bnew->free_ij      = PETSC_TRUE;
3592 
3593   /* condense columns of maij->B */
3594   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3595   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3596   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3597   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3598   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3599   PetscFunctionReturn(0);
3600 }
3601 
3602 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3603 
3604 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3605 {
3606   PetscErrorCode ierr;
3607   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3608   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3609   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3610   Mat            M,Msub,B=a->B;
3611   MatScalar      *aa;
3612   Mat_SeqAIJ     *aij;
3613   PetscInt       *garray = a->garray,*colsub,Ncols;
3614   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3615   IS             iscol_sub,iscmap;
3616   const PetscInt *is_idx,*cmap;
3617   PetscBool      allcolumns=PETSC_FALSE;
3618   MPI_Comm       comm;
3619 
3620   PetscFunctionBegin;
3621   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3622 
3623   if (call == MAT_REUSE_MATRIX) {
3624     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3625     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3626     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3627 
3628     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3629     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3630 
3631     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3632     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3633 
3634     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3635 
3636   } else { /* call == MAT_INITIAL_MATRIX) */
3637     PetscBool flg;
3638 
3639     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3640     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3641 
3642     /* (1) iscol -> nonscalable iscol_local */
3643     /* Check for special case: each processor gets entire matrix columns */
3644     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3645     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3646     if (allcolumns) {
3647       iscol_sub = iscol_local;
3648       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3649       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3650 
3651     } else {
3652       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3653       PetscInt *idx,*cmap1,k;
3654       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3655       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3656       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3657       count = 0;
3658       k     = 0;
3659       for (i=0; i<Ncols; i++) {
3660         j = is_idx[i];
3661         if (j >= cstart && j < cend) {
3662           /* diagonal part of mat */
3663           idx[count]     = j;
3664           cmap1[count++] = i; /* column index in submat */
3665         } else if (Bn) {
3666           /* off-diagonal part of mat */
3667           if (j == garray[k]) {
3668             idx[count]     = j;
3669             cmap1[count++] = i;  /* column index in submat */
3670           } else if (j > garray[k]) {
3671             while (j > garray[k] && k < Bn-1) k++;
3672             if (j == garray[k]) {
3673               idx[count]     = j;
3674               cmap1[count++] = i; /* column index in submat */
3675             }
3676           }
3677         }
3678       }
3679       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3680 
3681       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3682       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3683       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3684 
3685       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3686     }
3687 
3688     /* (3) Create sequential Msub */
3689     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3690   }
3691 
3692   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3693   aij  = (Mat_SeqAIJ*)(Msub)->data;
3694   ii   = aij->i;
3695   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3696 
3697   /*
3698       m - number of local rows
3699       Ncols - number of columns (same on all processors)
3700       rstart - first row in new global matrix generated
3701   */
3702   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3703 
3704   if (call == MAT_INITIAL_MATRIX) {
3705     /* (4) Create parallel newmat */
3706     PetscMPIInt    rank,size;
3707     PetscInt       csize;
3708 
3709     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3710     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3711 
3712     /*
3713         Determine the number of non-zeros in the diagonal and off-diagonal
3714         portions of the matrix in order to do correct preallocation
3715     */
3716 
3717     /* first get start and end of "diagonal" columns */
3718     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3719     if (csize == PETSC_DECIDE) {
3720       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3721       if (mglobal == Ncols) { /* square matrix */
3722         nlocal = m;
3723       } else {
3724         nlocal = Ncols/size + ((Ncols % size) > rank);
3725       }
3726     } else {
3727       nlocal = csize;
3728     }
3729     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3730     rstart = rend - nlocal;
3731     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3732 
3733     /* next, compute all the lengths */
3734     jj    = aij->j;
3735     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3736     olens = dlens + m;
3737     for (i=0; i<m; i++) {
3738       jend = ii[i+1] - ii[i];
3739       olen = 0;
3740       dlen = 0;
3741       for (j=0; j<jend; j++) {
3742         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3743         else dlen++;
3744         jj++;
3745       }
3746       olens[i] = olen;
3747       dlens[i] = dlen;
3748     }
3749 
3750     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3751     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3752 
3753     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3754     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3755     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3756     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3757     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3758     ierr = PetscFree(dlens);CHKERRQ(ierr);
3759 
3760   } else { /* call == MAT_REUSE_MATRIX */
3761     M    = *newmat;
3762     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3763     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3764     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3765     /*
3766          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3767        rather than the slower MatSetValues().
3768     */
3769     M->was_assembled = PETSC_TRUE;
3770     M->assembled     = PETSC_FALSE;
3771   }
3772 
3773   /* (5) Set values of Msub to *newmat */
3774   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3775   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3776 
3777   jj   = aij->j;
3778   aa   = aij->a;
3779   for (i=0; i<m; i++) {
3780     row = rstart + i;
3781     nz  = ii[i+1] - ii[i];
3782     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3783     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3784     jj += nz; aa += nz;
3785   }
3786   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3787 
3788   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3789   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3790 
3791   ierr = PetscFree(colsub);CHKERRQ(ierr);
3792 
3793   /* save Msub, iscol_sub and iscmap used in processor for next request */
3794   if (call ==  MAT_INITIAL_MATRIX) {
3795     *newmat = M;
3796     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3797     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3798 
3799     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3800     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3801 
3802     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3803     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3804 
3805     if (iscol_local) {
3806       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3807       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3808     }
3809   }
3810   PetscFunctionReturn(0);
3811 }
3812 
3813 /*
3814     Not great since it makes two copies of the submatrix, first an SeqAIJ
3815   in local and then by concatenating the local matrices the end result.
3816   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3817 
3818   Note: This requires a sequential iscol with all indices.
3819 */
3820 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3821 {
3822   PetscErrorCode ierr;
3823   PetscMPIInt    rank,size;
3824   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3825   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3826   Mat            M,Mreuse;
3827   MatScalar      *aa,*vwork;
3828   MPI_Comm       comm;
3829   Mat_SeqAIJ     *aij;
3830   PetscBool      colflag,allcolumns=PETSC_FALSE;
3831 
3832   PetscFunctionBegin;
3833   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3834   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3835   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3836 
3837   /* Check for special case: each processor gets entire matrix columns */
3838   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3839   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3840   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3841 
3842   if (call ==  MAT_REUSE_MATRIX) {
3843     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3844     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3845     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3846   } else {
3847     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3848   }
3849 
3850   /*
3851       m - number of local rows
3852       n - number of columns (same on all processors)
3853       rstart - first row in new global matrix generated
3854   */
3855   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3856   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3857   if (call == MAT_INITIAL_MATRIX) {
3858     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3859     ii  = aij->i;
3860     jj  = aij->j;
3861 
3862     /*
3863         Determine the number of non-zeros in the diagonal and off-diagonal
3864         portions of the matrix in order to do correct preallocation
3865     */
3866 
3867     /* first get start and end of "diagonal" columns */
3868     if (csize == PETSC_DECIDE) {
3869       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3870       if (mglobal == n) { /* square matrix */
3871         nlocal = m;
3872       } else {
3873         nlocal = n/size + ((n % size) > rank);
3874       }
3875     } else {
3876       nlocal = csize;
3877     }
3878     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3879     rstart = rend - nlocal;
3880     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3881 
3882     /* next, compute all the lengths */
3883     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3884     olens = dlens + m;
3885     for (i=0; i<m; i++) {
3886       jend = ii[i+1] - ii[i];
3887       olen = 0;
3888       dlen = 0;
3889       for (j=0; j<jend; j++) {
3890         if (*jj < rstart || *jj >= rend) olen++;
3891         else dlen++;
3892         jj++;
3893       }
3894       olens[i] = olen;
3895       dlens[i] = dlen;
3896     }
3897     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3898     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3899     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3900     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3901     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3902     ierr = PetscFree(dlens);CHKERRQ(ierr);
3903   } else {
3904     PetscInt ml,nl;
3905 
3906     M    = *newmat;
3907     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3908     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3909     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3910     /*
3911          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3912        rather than the slower MatSetValues().
3913     */
3914     M->was_assembled = PETSC_TRUE;
3915     M->assembled     = PETSC_FALSE;
3916   }
3917   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3918   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3919   ii   = aij->i;
3920   jj   = aij->j;
3921   aa   = aij->a;
3922   for (i=0; i<m; i++) {
3923     row   = rstart + i;
3924     nz    = ii[i+1] - ii[i];
3925     cwork = jj;     jj += nz;
3926     vwork = aa;     aa += nz;
3927     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3928   }
3929 
3930   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932   *newmat = M;
3933 
3934   /* save submatrix used in processor for next request */
3935   if (call ==  MAT_INITIAL_MATRIX) {
3936     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3937     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3938   }
3939   PetscFunctionReturn(0);
3940 }
3941 
3942 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3943 {
3944   PetscInt       m,cstart, cend,j,nnz,i,d;
3945   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3946   const PetscInt *JJ;
3947   PetscErrorCode ierr;
3948   PetscBool      nooffprocentries;
3949 
3950   PetscFunctionBegin;
3951   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3952 
3953   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3954   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3955   m      = B->rmap->n;
3956   cstart = B->cmap->rstart;
3957   cend   = B->cmap->rend;
3958   rstart = B->rmap->rstart;
3959 
3960   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3961 
3962 #if defined(PETSC_USE_DEBUG)
3963   for (i=0; i<m; i++) {
3964     nnz = Ii[i+1]- Ii[i];
3965     JJ  = J + Ii[i];
3966     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3967     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3968     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3969   }
3970 #endif
3971 
3972   for (i=0; i<m; i++) {
3973     nnz     = Ii[i+1]- Ii[i];
3974     JJ      = J + Ii[i];
3975     nnz_max = PetscMax(nnz_max,nnz);
3976     d       = 0;
3977     for (j=0; j<nnz; j++) {
3978       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3979     }
3980     d_nnz[i] = d;
3981     o_nnz[i] = nnz - d;
3982   }
3983   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3984   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3985 
3986   for (i=0; i<m; i++) {
3987     ii   = i + rstart;
3988     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3989   }
3990   nooffprocentries    = B->nooffprocentries;
3991   B->nooffprocentries = PETSC_TRUE;
3992   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3993   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3994   B->nooffprocentries = nooffprocentries;
3995 
3996   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3997   PetscFunctionReturn(0);
3998 }
3999 
4000 /*@
4001    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4002    (the default parallel PETSc format).
4003 
4004    Collective
4005 
4006    Input Parameters:
4007 +  B - the matrix
4008 .  i - the indices into j for the start of each local row (starts with zero)
4009 .  j - the column indices for each local row (starts with zero)
4010 -  v - optional values in the matrix
4011 
4012    Level: developer
4013 
4014    Notes:
4015        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4016      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4017      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4018 
4019        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4020 
4021        The format which is used for the sparse matrix input, is equivalent to a
4022     row-major ordering.. i.e for the following matrix, the input data expected is
4023     as shown
4024 
4025 $        1 0 0
4026 $        2 0 3     P0
4027 $       -------
4028 $        4 5 6     P1
4029 $
4030 $     Process0 [P0]: rows_owned=[0,1]
4031 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4032 $        j =  {0,0,2}  [size = 3]
4033 $        v =  {1,2,3}  [size = 3]
4034 $
4035 $     Process1 [P1]: rows_owned=[2]
4036 $        i =  {0,3}    [size = nrow+1  = 1+1]
4037 $        j =  {0,1,2}  [size = 3]
4038 $        v =  {4,5,6}  [size = 3]
4039 
4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4041           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4042 @*/
4043 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4044 {
4045   PetscErrorCode ierr;
4046 
4047   PetscFunctionBegin;
4048   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4049   PetscFunctionReturn(0);
4050 }
4051 
4052 /*@C
4053    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4054    (the default parallel PETSc format).  For good matrix assembly performance
4055    the user should preallocate the matrix storage by setting the parameters
4056    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4057    performance can be increased by more than a factor of 50.
4058 
4059    Collective
4060 
4061    Input Parameters:
4062 +  B - the matrix
4063 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4064            (same value is used for all local rows)
4065 .  d_nnz - array containing the number of nonzeros in the various rows of the
4066            DIAGONAL portion of the local submatrix (possibly different for each row)
4067            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4068            The size of this array is equal to the number of local rows, i.e 'm'.
4069            For matrices that will be factored, you must leave room for (and set)
4070            the diagonal entry even if it is zero.
4071 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4072            submatrix (same value is used for all local rows).
4073 -  o_nnz - array containing the number of nonzeros in the various rows of the
4074            OFF-DIAGONAL portion of the local submatrix (possibly different for
4075            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4076            structure. The size of this array is equal to the number
4077            of local rows, i.e 'm'.
4078 
4079    If the *_nnz parameter is given then the *_nz parameter is ignored
4080 
4081    The AIJ format (also called the Yale sparse matrix format or
4082    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4083    storage.  The stored row and column indices begin with zero.
4084    See Users-Manual: ch_mat for details.
4085 
4086    The parallel matrix is partitioned such that the first m0 rows belong to
4087    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4088    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4089 
4090    The DIAGONAL portion of the local submatrix of a processor can be defined
4091    as the submatrix which is obtained by extraction the part corresponding to
4092    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4093    first row that belongs to the processor, r2 is the last row belonging to
4094    the this processor, and c1-c2 is range of indices of the local part of a
4095    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4096    common case of a square matrix, the row and column ranges are the same and
4097    the DIAGONAL part is also square. The remaining portion of the local
4098    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4099 
4100    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4101 
4102    You can call MatGetInfo() to get information on how effective the preallocation was;
4103    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4104    You can also run with the option -info and look for messages with the string
4105    malloc in them to see if additional memory allocation was needed.
4106 
4107    Example usage:
4108 
4109    Consider the following 8x8 matrix with 34 non-zero values, that is
4110    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4111    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4112    as follows:
4113 
4114 .vb
4115             1  2  0  |  0  3  0  |  0  4
4116     Proc0   0  5  6  |  7  0  0  |  8  0
4117             9  0 10  | 11  0  0  | 12  0
4118     -------------------------------------
4119            13  0 14  | 15 16 17  |  0  0
4120     Proc1   0 18  0  | 19 20 21  |  0  0
4121             0  0  0  | 22 23  0  | 24  0
4122     -------------------------------------
4123     Proc2  25 26 27  |  0  0 28  | 29  0
4124            30  0  0  | 31 32 33  |  0 34
4125 .ve
4126 
4127    This can be represented as a collection of submatrices as:
4128 
4129 .vb
4130       A B C
4131       D E F
4132       G H I
4133 .ve
4134 
4135    Where the submatrices A,B,C are owned by proc0, D,E,F are
4136    owned by proc1, G,H,I are owned by proc2.
4137 
4138    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4139    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4140    The 'M','N' parameters are 8,8, and have the same values on all procs.
4141 
4142    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4143    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4144    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4145    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4146    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4147    matrix, ans [DF] as another SeqAIJ matrix.
4148 
4149    When d_nz, o_nz parameters are specified, d_nz storage elements are
4150    allocated for every row of the local diagonal submatrix, and o_nz
4151    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4152    One way to choose d_nz and o_nz is to use the max nonzerors per local
4153    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4154    In this case, the values of d_nz,o_nz are:
4155 .vb
4156      proc0 : dnz = 2, o_nz = 2
4157      proc1 : dnz = 3, o_nz = 2
4158      proc2 : dnz = 1, o_nz = 4
4159 .ve
4160    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4161    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4162    for proc3. i.e we are using 12+15+10=37 storage locations to store
4163    34 values.
4164 
4165    When d_nnz, o_nnz parameters are specified, the storage is specified
4166    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4167    In the above case the values for d_nnz,o_nnz are:
4168 .vb
4169      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4170      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4171      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4172 .ve
4173    Here the space allocated is sum of all the above values i.e 34, and
4174    hence pre-allocation is perfect.
4175 
4176    Level: intermediate
4177 
4178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4179           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4180 @*/
4181 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4182 {
4183   PetscErrorCode ierr;
4184 
4185   PetscFunctionBegin;
4186   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4187   PetscValidType(B,1);
4188   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4189   PetscFunctionReturn(0);
4190 }
4191 
4192 /*@
4193      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4194          CSR format for the local rows.
4195 
4196    Collective
4197 
4198    Input Parameters:
4199 +  comm - MPI communicator
4200 .  m - number of local rows (Cannot be PETSC_DECIDE)
4201 .  n - This value should be the same as the local size used in creating the
4202        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4203        calculated if N is given) For square matrices n is almost always m.
4204 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4205 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4206 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4207 .   j - column indices
4208 -   a - matrix values
4209 
4210    Output Parameter:
4211 .   mat - the matrix
4212 
4213    Level: intermediate
4214 
4215    Notes:
4216        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4217      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4218      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4219 
4220        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4221 
4222        The format which is used for the sparse matrix input, is equivalent to a
4223     row-major ordering.. i.e for the following matrix, the input data expected is
4224     as shown
4225 
4226        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4227 
4228 $        1 0 0
4229 $        2 0 3     P0
4230 $       -------
4231 $        4 5 6     P1
4232 $
4233 $     Process0 [P0]: rows_owned=[0,1]
4234 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4235 $        j =  {0,0,2}  [size = 3]
4236 $        v =  {1,2,3}  [size = 3]
4237 $
4238 $     Process1 [P1]: rows_owned=[2]
4239 $        i =  {0,3}    [size = nrow+1  = 1+1]
4240 $        j =  {0,1,2}  [size = 3]
4241 $        v =  {4,5,6}  [size = 3]
4242 
4243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4244           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4245 @*/
4246 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4247 {
4248   PetscErrorCode ierr;
4249 
4250   PetscFunctionBegin;
4251   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4252   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4253   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4254   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4255   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4256   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4257   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4258   PetscFunctionReturn(0);
4259 }
4260 
4261 /*@
4262      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4263          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4264 
4265    Collective
4266 
4267    Input Parameters:
4268 +  mat - the matrix
4269 .  m - number of local rows (Cannot be PETSC_DECIDE)
4270 .  n - This value should be the same as the local size used in creating the
4271        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4272        calculated if N is given) For square matrices n is almost always m.
4273 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4274 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4275 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4276 .  J - column indices
4277 -  v - matrix values
4278 
4279    Level: intermediate
4280 
4281 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4282           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4283 @*/
4284 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4285 {
4286   PetscErrorCode ierr;
4287   PetscInt       cstart,nnz,i,j;
4288   PetscInt       *ld;
4289   PetscBool      nooffprocentries;
4290   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4291   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4292   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4293   const PetscInt *Adi = Ad->i;
4294   PetscInt       ldi,Iii,md;
4295 
4296   PetscFunctionBegin;
4297   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4298   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4299   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4300   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4301 
4302   cstart = mat->cmap->rstart;
4303   if (!Aij->ld) {
4304     /* count number of entries below block diagonal */
4305     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4306     Aij->ld = ld;
4307     for (i=0; i<m; i++) {
4308       nnz  = Ii[i+1]- Ii[i];
4309       j     = 0;
4310       while  (J[j] < cstart && j < nnz) {j++;}
4311       J    += nnz;
4312       ld[i] = j;
4313     }
4314   } else {
4315     ld = Aij->ld;
4316   }
4317 
4318   for (i=0; i<m; i++) {
4319     nnz  = Ii[i+1]- Ii[i];
4320     Iii  = Ii[i];
4321     ldi  = ld[i];
4322     md   = Adi[i+1]-Adi[i];
4323     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4324     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4325     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4326     ad  += md;
4327     ao  += nnz - md;
4328   }
4329   nooffprocentries      = mat->nooffprocentries;
4330   mat->nooffprocentries = PETSC_TRUE;
4331   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4332   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4333   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4334   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4335   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4336   mat->nooffprocentries = nooffprocentries;
4337   PetscFunctionReturn(0);
4338 }
4339 
4340 /*@C
4341    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4342    (the default parallel PETSc format).  For good matrix assembly performance
4343    the user should preallocate the matrix storage by setting the parameters
4344    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4345    performance can be increased by more than a factor of 50.
4346 
4347    Collective
4348 
4349    Input Parameters:
4350 +  comm - MPI communicator
4351 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4352            This value should be the same as the local size used in creating the
4353            y vector for the matrix-vector product y = Ax.
4354 .  n - This value should be the same as the local size used in creating the
4355        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4356        calculated if N is given) For square matrices n is almost always m.
4357 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4358 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4359 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4360            (same value is used for all local rows)
4361 .  d_nnz - array containing the number of nonzeros in the various rows of the
4362            DIAGONAL portion of the local submatrix (possibly different for each row)
4363            or NULL, if d_nz is used to specify the nonzero structure.
4364            The size of this array is equal to the number of local rows, i.e 'm'.
4365 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4366            submatrix (same value is used for all local rows).
4367 -  o_nnz - array containing the number of nonzeros in the various rows of the
4368            OFF-DIAGONAL portion of the local submatrix (possibly different for
4369            each row) or NULL, if o_nz is used to specify the nonzero
4370            structure. The size of this array is equal to the number
4371            of local rows, i.e 'm'.
4372 
4373    Output Parameter:
4374 .  A - the matrix
4375 
4376    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4377    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4378    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4379 
4380    Notes:
4381    If the *_nnz parameter is given then the *_nz parameter is ignored
4382 
4383    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4384    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4385    storage requirements for this matrix.
4386 
4387    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4388    processor than it must be used on all processors that share the object for
4389    that argument.
4390 
4391    The user MUST specify either the local or global matrix dimensions
4392    (possibly both).
4393 
4394    The parallel matrix is partitioned across processors such that the
4395    first m0 rows belong to process 0, the next m1 rows belong to
4396    process 1, the next m2 rows belong to process 2 etc.. where
4397    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4398    values corresponding to [m x N] submatrix.
4399 
4400    The columns are logically partitioned with the n0 columns belonging
4401    to 0th partition, the next n1 columns belonging to the next
4402    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4403 
4404    The DIAGONAL portion of the local submatrix on any given processor
4405    is the submatrix corresponding to the rows and columns m,n
4406    corresponding to the given processor. i.e diagonal matrix on
4407    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4408    etc. The remaining portion of the local submatrix [m x (N-n)]
4409    constitute the OFF-DIAGONAL portion. The example below better
4410    illustrates this concept.
4411 
4412    For a square global matrix we define each processor's diagonal portion
4413    to be its local rows and the corresponding columns (a square submatrix);
4414    each processor's off-diagonal portion encompasses the remainder of the
4415    local matrix (a rectangular submatrix).
4416 
4417    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4418 
4419    When calling this routine with a single process communicator, a matrix of
4420    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4421    type of communicator, use the construction mechanism
4422 .vb
4423      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4424 .ve
4425 
4426 $     MatCreate(...,&A);
4427 $     MatSetType(A,MATMPIAIJ);
4428 $     MatSetSizes(A, m,n,M,N);
4429 $     MatMPIAIJSetPreallocation(A,...);
4430 
4431    By default, this format uses inodes (identical nodes) when possible.
4432    We search for consecutive rows with the same nonzero structure, thereby
4433    reusing matrix information to achieve increased efficiency.
4434 
4435    Options Database Keys:
4436 +  -mat_no_inode  - Do not use inodes
4437 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4438 
4439 
4440 
4441    Example usage:
4442 
4443    Consider the following 8x8 matrix with 34 non-zero values, that is
4444    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4445    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4446    as follows
4447 
4448 .vb
4449             1  2  0  |  0  3  0  |  0  4
4450     Proc0   0  5  6  |  7  0  0  |  8  0
4451             9  0 10  | 11  0  0  | 12  0
4452     -------------------------------------
4453            13  0 14  | 15 16 17  |  0  0
4454     Proc1   0 18  0  | 19 20 21  |  0  0
4455             0  0  0  | 22 23  0  | 24  0
4456     -------------------------------------
4457     Proc2  25 26 27  |  0  0 28  | 29  0
4458            30  0  0  | 31 32 33  |  0 34
4459 .ve
4460 
4461    This can be represented as a collection of submatrices as
4462 
4463 .vb
4464       A B C
4465       D E F
4466       G H I
4467 .ve
4468 
4469    Where the submatrices A,B,C are owned by proc0, D,E,F are
4470    owned by proc1, G,H,I are owned by proc2.
4471 
4472    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4473    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4474    The 'M','N' parameters are 8,8, and have the same values on all procs.
4475 
4476    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4477    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4478    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4479    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4480    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4481    matrix, ans [DF] as another SeqAIJ matrix.
4482 
4483    When d_nz, o_nz parameters are specified, d_nz storage elements are
4484    allocated for every row of the local diagonal submatrix, and o_nz
4485    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4486    One way to choose d_nz and o_nz is to use the max nonzerors per local
4487    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4488    In this case, the values of d_nz,o_nz are
4489 .vb
4490      proc0 : dnz = 2, o_nz = 2
4491      proc1 : dnz = 3, o_nz = 2
4492      proc2 : dnz = 1, o_nz = 4
4493 .ve
4494    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4495    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4496    for proc3. i.e we are using 12+15+10=37 storage locations to store
4497    34 values.
4498 
4499    When d_nnz, o_nnz parameters are specified, the storage is specified
4500    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4501    In the above case the values for d_nnz,o_nnz are
4502 .vb
4503      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4504      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4505      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4506 .ve
4507    Here the space allocated is sum of all the above values i.e 34, and
4508    hence pre-allocation is perfect.
4509 
4510    Level: intermediate
4511 
4512 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4513           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4514 @*/
4515 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4516 {
4517   PetscErrorCode ierr;
4518   PetscMPIInt    size;
4519 
4520   PetscFunctionBegin;
4521   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4522   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4523   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4524   if (size > 1) {
4525     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4526     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4527   } else {
4528     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4529     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4530   }
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4535 {
4536   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4537   PetscBool      flg;
4538   PetscErrorCode ierr;
4539 
4540   PetscFunctionBegin;
4541   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4542   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4543   if (Ad)     *Ad     = a->A;
4544   if (Ao)     *Ao     = a->B;
4545   if (colmap) *colmap = a->garray;
4546   PetscFunctionReturn(0);
4547 }
4548 
4549 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4550 {
4551   PetscErrorCode ierr;
4552   PetscInt       m,N,i,rstart,nnz,Ii;
4553   PetscInt       *indx;
4554   PetscScalar    *values;
4555 
4556   PetscFunctionBegin;
4557   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4558   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4559     PetscInt       *dnz,*onz,sum,bs,cbs;
4560 
4561     if (n == PETSC_DECIDE) {
4562       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4563     }
4564     /* Check sum(n) = N */
4565     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4566     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4567 
4568     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4569     rstart -= m;
4570 
4571     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4572     for (i=0; i<m; i++) {
4573       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4574       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4575       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4576     }
4577 
4578     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4579     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4580     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4581     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4582     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4583     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4584     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4585     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4586   }
4587 
4588   /* numeric phase */
4589   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4590   for (i=0; i<m; i++) {
4591     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4592     Ii   = i + rstart;
4593     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4594     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4595   }
4596   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4597   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4598   PetscFunctionReturn(0);
4599 }
4600 
4601 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4602 {
4603   PetscErrorCode    ierr;
4604   PetscMPIInt       rank;
4605   PetscInt          m,N,i,rstart,nnz;
4606   size_t            len;
4607   const PetscInt    *indx;
4608   PetscViewer       out;
4609   char              *name;
4610   Mat               B;
4611   const PetscScalar *values;
4612 
4613   PetscFunctionBegin;
4614   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4615   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4616   /* Should this be the type of the diagonal block of A? */
4617   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4618   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4619   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4620   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4621   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4622   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4623   for (i=0; i<m; i++) {
4624     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4625     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4626     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4627   }
4628   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4629   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4630 
4631   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4632   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4633   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4634   sprintf(name,"%s.%d",outfile,rank);
4635   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4636   ierr = PetscFree(name);CHKERRQ(ierr);
4637   ierr = MatView(B,out);CHKERRQ(ierr);
4638   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4639   ierr = MatDestroy(&B);CHKERRQ(ierr);
4640   PetscFunctionReturn(0);
4641 }
4642 
4643 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4644 {
4645   PetscErrorCode      ierr;
4646   Mat_Merge_SeqsToMPI *merge;
4647   PetscContainer      container;
4648 
4649   PetscFunctionBegin;
4650   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4651   if (container) {
4652     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4653     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4654     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4655     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4656     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4657     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4658     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4659     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4660     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4661     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4662     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4663     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4664     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4665     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4666     ierr = PetscFree(merge);CHKERRQ(ierr);
4667     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4668   }
4669   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4670   PetscFunctionReturn(0);
4671 }
4672 
4673 #include <../src/mat/utils/freespace.h>
4674 #include <petscbt.h>
4675 
4676 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4677 {
4678   PetscErrorCode      ierr;
4679   MPI_Comm            comm;
4680   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4681   PetscMPIInt         size,rank,taga,*len_s;
4682   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4683   PetscInt            proc,m;
4684   PetscInt            **buf_ri,**buf_rj;
4685   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4686   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4687   MPI_Request         *s_waits,*r_waits;
4688   MPI_Status          *status;
4689   MatScalar           *aa=a->a;
4690   MatScalar           **abuf_r,*ba_i;
4691   Mat_Merge_SeqsToMPI *merge;
4692   PetscContainer      container;
4693 
4694   PetscFunctionBegin;
4695   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4696   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4697 
4698   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4699   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4700 
4701   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4702   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4703 
4704   bi     = merge->bi;
4705   bj     = merge->bj;
4706   buf_ri = merge->buf_ri;
4707   buf_rj = merge->buf_rj;
4708 
4709   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4710   owners = merge->rowmap->range;
4711   len_s  = merge->len_s;
4712 
4713   /* send and recv matrix values */
4714   /*-----------------------------*/
4715   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4716   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4717 
4718   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4719   for (proc=0,k=0; proc<size; proc++) {
4720     if (!len_s[proc]) continue;
4721     i    = owners[proc];
4722     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4723     k++;
4724   }
4725 
4726   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4727   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4728   ierr = PetscFree(status);CHKERRQ(ierr);
4729 
4730   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4731   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4732 
4733   /* insert mat values of mpimat */
4734   /*----------------------------*/
4735   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4736   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4737 
4738   for (k=0; k<merge->nrecv; k++) {
4739     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4740     nrows       = *(buf_ri_k[k]);
4741     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4742     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4743   }
4744 
4745   /* set values of ba */
4746   m = merge->rowmap->n;
4747   for (i=0; i<m; i++) {
4748     arow = owners[rank] + i;
4749     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4750     bnzi = bi[i+1] - bi[i];
4751     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4752 
4753     /* add local non-zero vals of this proc's seqmat into ba */
4754     anzi   = ai[arow+1] - ai[arow];
4755     aj     = a->j + ai[arow];
4756     aa     = a->a + ai[arow];
4757     nextaj = 0;
4758     for (j=0; nextaj<anzi; j++) {
4759       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4760         ba_i[j] += aa[nextaj++];
4761       }
4762     }
4763 
4764     /* add received vals into ba */
4765     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4766       /* i-th row */
4767       if (i == *nextrow[k]) {
4768         anzi   = *(nextai[k]+1) - *nextai[k];
4769         aj     = buf_rj[k] + *(nextai[k]);
4770         aa     = abuf_r[k] + *(nextai[k]);
4771         nextaj = 0;
4772         for (j=0; nextaj<anzi; j++) {
4773           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4774             ba_i[j] += aa[nextaj++];
4775           }
4776         }
4777         nextrow[k]++; nextai[k]++;
4778       }
4779     }
4780     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4781   }
4782   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4783   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4784 
4785   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4786   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4787   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4788   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4789   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4790   PetscFunctionReturn(0);
4791 }
4792 
4793 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4794 {
4795   PetscErrorCode      ierr;
4796   Mat                 B_mpi;
4797   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4798   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4799   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4800   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4801   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4802   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4803   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4804   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4805   MPI_Status          *status;
4806   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4807   PetscBT             lnkbt;
4808   Mat_Merge_SeqsToMPI *merge;
4809   PetscContainer      container;
4810 
4811   PetscFunctionBegin;
4812   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4813 
4814   /* make sure it is a PETSc comm */
4815   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4816   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4817   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4818 
4819   ierr = PetscNew(&merge);CHKERRQ(ierr);
4820   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4821 
4822   /* determine row ownership */
4823   /*---------------------------------------------------------*/
4824   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4825   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4826   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4827   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4828   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4829   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4830   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4831 
4832   m      = merge->rowmap->n;
4833   owners = merge->rowmap->range;
4834 
4835   /* determine the number of messages to send, their lengths */
4836   /*---------------------------------------------------------*/
4837   len_s = merge->len_s;
4838 
4839   len          = 0; /* length of buf_si[] */
4840   merge->nsend = 0;
4841   for (proc=0; proc<size; proc++) {
4842     len_si[proc] = 0;
4843     if (proc == rank) {
4844       len_s[proc] = 0;
4845     } else {
4846       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4847       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4848     }
4849     if (len_s[proc]) {
4850       merge->nsend++;
4851       nrows = 0;
4852       for (i=owners[proc]; i<owners[proc+1]; i++) {
4853         if (ai[i+1] > ai[i]) nrows++;
4854       }
4855       len_si[proc] = 2*(nrows+1);
4856       len         += len_si[proc];
4857     }
4858   }
4859 
4860   /* determine the number and length of messages to receive for ij-structure */
4861   /*-------------------------------------------------------------------------*/
4862   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4863   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4864 
4865   /* post the Irecv of j-structure */
4866   /*-------------------------------*/
4867   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4868   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4869 
4870   /* post the Isend of j-structure */
4871   /*--------------------------------*/
4872   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4873 
4874   for (proc=0, k=0; proc<size; proc++) {
4875     if (!len_s[proc]) continue;
4876     i    = owners[proc];
4877     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4878     k++;
4879   }
4880 
4881   /* receives and sends of j-structure are complete */
4882   /*------------------------------------------------*/
4883   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4884   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4885 
4886   /* send and recv i-structure */
4887   /*---------------------------*/
4888   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4889   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4890 
4891   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4892   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4893   for (proc=0,k=0; proc<size; proc++) {
4894     if (!len_s[proc]) continue;
4895     /* form outgoing message for i-structure:
4896          buf_si[0]:                 nrows to be sent
4897                [1:nrows]:           row index (global)
4898                [nrows+1:2*nrows+1]: i-structure index
4899     */
4900     /*-------------------------------------------*/
4901     nrows       = len_si[proc]/2 - 1;
4902     buf_si_i    = buf_si + nrows+1;
4903     buf_si[0]   = nrows;
4904     buf_si_i[0] = 0;
4905     nrows       = 0;
4906     for (i=owners[proc]; i<owners[proc+1]; i++) {
4907       anzi = ai[i+1] - ai[i];
4908       if (anzi) {
4909         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4910         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4911         nrows++;
4912       }
4913     }
4914     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4915     k++;
4916     buf_si += len_si[proc];
4917   }
4918 
4919   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4920   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4921 
4922   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4923   for (i=0; i<merge->nrecv; i++) {
4924     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4925   }
4926 
4927   ierr = PetscFree(len_si);CHKERRQ(ierr);
4928   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4929   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4930   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4931   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4932   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4933   ierr = PetscFree(status);CHKERRQ(ierr);
4934 
4935   /* compute a local seq matrix in each processor */
4936   /*----------------------------------------------*/
4937   /* allocate bi array and free space for accumulating nonzero column info */
4938   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4939   bi[0] = 0;
4940 
4941   /* create and initialize a linked list */
4942   nlnk = N+1;
4943   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4944 
4945   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4946   len  = ai[owners[rank+1]] - ai[owners[rank]];
4947   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4948 
4949   current_space = free_space;
4950 
4951   /* determine symbolic info for each local row */
4952   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4953 
4954   for (k=0; k<merge->nrecv; k++) {
4955     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4956     nrows       = *buf_ri_k[k];
4957     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4958     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4959   }
4960 
4961   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4962   len  = 0;
4963   for (i=0; i<m; i++) {
4964     bnzi = 0;
4965     /* add local non-zero cols of this proc's seqmat into lnk */
4966     arow  = owners[rank] + i;
4967     anzi  = ai[arow+1] - ai[arow];
4968     aj    = a->j + ai[arow];
4969     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4970     bnzi += nlnk;
4971     /* add received col data into lnk */
4972     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4973       if (i == *nextrow[k]) { /* i-th row */
4974         anzi  = *(nextai[k]+1) - *nextai[k];
4975         aj    = buf_rj[k] + *nextai[k];
4976         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4977         bnzi += nlnk;
4978         nextrow[k]++; nextai[k]++;
4979       }
4980     }
4981     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4982 
4983     /* if free space is not available, make more free space */
4984     if (current_space->local_remaining<bnzi) {
4985       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4986       nspacedouble++;
4987     }
4988     /* copy data into free space, then initialize lnk */
4989     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4990     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4991 
4992     current_space->array           += bnzi;
4993     current_space->local_used      += bnzi;
4994     current_space->local_remaining -= bnzi;
4995 
4996     bi[i+1] = bi[i] + bnzi;
4997   }
4998 
4999   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5000 
5001   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5002   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5003   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5004 
5005   /* create symbolic parallel matrix B_mpi */
5006   /*---------------------------------------*/
5007   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5008   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5009   if (n==PETSC_DECIDE) {
5010     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5011   } else {
5012     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5013   }
5014   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5015   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5016   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5017   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5018   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5019 
5020   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5021   B_mpi->assembled    = PETSC_FALSE;
5022   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5023   merge->bi           = bi;
5024   merge->bj           = bj;
5025   merge->buf_ri       = buf_ri;
5026   merge->buf_rj       = buf_rj;
5027   merge->coi          = NULL;
5028   merge->coj          = NULL;
5029   merge->owners_co    = NULL;
5030 
5031   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5032 
5033   /* attach the supporting struct to B_mpi for reuse */
5034   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5035   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5036   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5037   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5038   *mpimat = B_mpi;
5039 
5040   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5041   PetscFunctionReturn(0);
5042 }
5043 
5044 /*@C
5045       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5046                  matrices from each processor
5047 
5048     Collective
5049 
5050    Input Parameters:
5051 +    comm - the communicators the parallel matrix will live on
5052 .    seqmat - the input sequential matrices
5053 .    m - number of local rows (or PETSC_DECIDE)
5054 .    n - number of local columns (or PETSC_DECIDE)
5055 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5056 
5057    Output Parameter:
5058 .    mpimat - the parallel matrix generated
5059 
5060     Level: advanced
5061 
5062    Notes:
5063      The dimensions of the sequential matrix in each processor MUST be the same.
5064      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5065      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5066 @*/
5067 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5068 {
5069   PetscErrorCode ierr;
5070   PetscMPIInt    size;
5071 
5072   PetscFunctionBegin;
5073   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5074   if (size == 1) {
5075     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5076     if (scall == MAT_INITIAL_MATRIX) {
5077       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5078     } else {
5079       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5080     }
5081     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5082     PetscFunctionReturn(0);
5083   }
5084   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5085   if (scall == MAT_INITIAL_MATRIX) {
5086     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5087   }
5088   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5089   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5090   PetscFunctionReturn(0);
5091 }
5092 
5093 /*@
5094      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5095           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5096           with MatGetSize()
5097 
5098     Not Collective
5099 
5100    Input Parameters:
5101 +    A - the matrix
5102 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5103 
5104    Output Parameter:
5105 .    A_loc - the local sequential matrix generated
5106 
5107     Level: developer
5108 
5109 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5110 
5111 @*/
5112 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5113 {
5114   PetscErrorCode ierr;
5115   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5116   Mat_SeqAIJ     *mat,*a,*b;
5117   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5118   MatScalar      *aa,*ba,*cam;
5119   PetscScalar    *ca;
5120   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5121   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5122   PetscBool      match;
5123   MPI_Comm       comm;
5124   PetscMPIInt    size;
5125 
5126   PetscFunctionBegin;
5127   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5128   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5129   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5130   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5131   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
5132 
5133   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5134   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5135   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5136   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5137   aa = a->a; ba = b->a;
5138   if (scall == MAT_INITIAL_MATRIX) {
5139     if (size == 1) {
5140       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
5141       PetscFunctionReturn(0);
5142     }
5143 
5144     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5145     ci[0] = 0;
5146     for (i=0; i<am; i++) {
5147       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5148     }
5149     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5150     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5151     k    = 0;
5152     for (i=0; i<am; i++) {
5153       ncols_o = bi[i+1] - bi[i];
5154       ncols_d = ai[i+1] - ai[i];
5155       /* off-diagonal portion of A */
5156       for (jo=0; jo<ncols_o; jo++) {
5157         col = cmap[*bj];
5158         if (col >= cstart) break;
5159         cj[k]   = col; bj++;
5160         ca[k++] = *ba++;
5161       }
5162       /* diagonal portion of A */
5163       for (j=0; j<ncols_d; j++) {
5164         cj[k]   = cstart + *aj++;
5165         ca[k++] = *aa++;
5166       }
5167       /* off-diagonal portion of A */
5168       for (j=jo; j<ncols_o; j++) {
5169         cj[k]   = cmap[*bj++];
5170         ca[k++] = *ba++;
5171       }
5172     }
5173     /* put together the new matrix */
5174     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5175     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5176     /* Since these are PETSc arrays, change flags to free them as necessary. */
5177     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5178     mat->free_a  = PETSC_TRUE;
5179     mat->free_ij = PETSC_TRUE;
5180     mat->nonew   = 0;
5181   } else if (scall == MAT_REUSE_MATRIX) {
5182     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5183     ci = mat->i; cj = mat->j; cam = mat->a;
5184     for (i=0; i<am; i++) {
5185       /* off-diagonal portion of A */
5186       ncols_o = bi[i+1] - bi[i];
5187       for (jo=0; jo<ncols_o; jo++) {
5188         col = cmap[*bj];
5189         if (col >= cstart) break;
5190         *cam++ = *ba++; bj++;
5191       }
5192       /* diagonal portion of A */
5193       ncols_d = ai[i+1] - ai[i];
5194       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5195       /* off-diagonal portion of A */
5196       for (j=jo; j<ncols_o; j++) {
5197         *cam++ = *ba++; bj++;
5198       }
5199     }
5200   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5201   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5202   PetscFunctionReturn(0);
5203 }
5204 
5205 /*@C
5206      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5207 
5208     Not Collective
5209 
5210    Input Parameters:
5211 +    A - the matrix
5212 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5213 -    row, col - index sets of rows and columns to extract (or NULL)
5214 
5215    Output Parameter:
5216 .    A_loc - the local sequential matrix generated
5217 
5218     Level: developer
5219 
5220 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5221 
5222 @*/
5223 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5224 {
5225   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5226   PetscErrorCode ierr;
5227   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5228   IS             isrowa,iscola;
5229   Mat            *aloc;
5230   PetscBool      match;
5231 
5232   PetscFunctionBegin;
5233   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5234   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5235   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5236   if (!row) {
5237     start = A->rmap->rstart; end = A->rmap->rend;
5238     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5239   } else {
5240     isrowa = *row;
5241   }
5242   if (!col) {
5243     start = A->cmap->rstart;
5244     cmap  = a->garray;
5245     nzA   = a->A->cmap->n;
5246     nzB   = a->B->cmap->n;
5247     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5248     ncols = 0;
5249     for (i=0; i<nzB; i++) {
5250       if (cmap[i] < start) idx[ncols++] = cmap[i];
5251       else break;
5252     }
5253     imark = i;
5254     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5255     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5256     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5257   } else {
5258     iscola = *col;
5259   }
5260   if (scall != MAT_INITIAL_MATRIX) {
5261     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5262     aloc[0] = *A_loc;
5263   }
5264   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5265   if (!col) { /* attach global id of condensed columns */
5266     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5267   }
5268   *A_loc = aloc[0];
5269   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5270   if (!row) {
5271     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5272   }
5273   if (!col) {
5274     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5275   }
5276   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5277   PetscFunctionReturn(0);
5278 }
5279 
5280 /*
5281  * Destroy a mat that may be composed with PetscSF communication objects.
5282  * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private.
5283  * */
5284 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat)
5285 {
5286   PetscSF          sf,osf;
5287   IS               map;
5288   PetscErrorCode   ierr;
5289 
5290   PetscFunctionBegin;
5291   ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5292   ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5293   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5294   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5295   ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr);
5296   ierr = ISDestroy(&map);CHKERRQ(ierr);
5297   ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr);
5298   PetscFunctionReturn(0);
5299 }
5300 
5301 /*
5302  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5303  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5304  * on a global size.
5305  * */
5306 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5307 {
5308   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5309   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5310   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol;
5311   PetscSFNode              *iremote,*oiremote;
5312   const PetscInt           *lrowindices;
5313   PetscErrorCode           ierr;
5314   PetscSF                  sf,osf;
5315   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5316   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5317   MPI_Comm                 comm;
5318   ISLocalToGlobalMapping   mapping;
5319 
5320   PetscFunctionBegin;
5321   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5322   /* plocalsize is the number of roots
5323    * nrows is the number of leaves
5324    * */
5325   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5326   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5327   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5328   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5329   for (i=0;i<nrows;i++) {
5330     /* Find a remote index and an owner for a row
5331      * The row could be local or remote
5332      * */
5333     owner = 0;
5334     lidx  = 0;
5335     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5336     iremote[i].index = lidx;
5337     iremote[i].rank  = owner;
5338   }
5339   /* Create SF to communicate how many nonzero columns for each row */
5340   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5341   /* SF will figure out the number of nonzero colunms for each row, and their
5342    * offsets
5343    * */
5344   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5345   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5346   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5347 
5348   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5349   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5350   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5351   roffsets[0] = 0;
5352   roffsets[1] = 0;
5353   for (i=0;i<plocalsize;i++) {
5354     /* diag */
5355     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5356     /* off diag */
5357     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5358     /* compute offsets so that we relative location for each row */
5359     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5360     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5361   }
5362   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5363   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5364   /* 'r' means root, and 'l' means leaf */
5365   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5366   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5367   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5368   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5369   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5370   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5371   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5372   dntotalcols = 0;
5373   ontotalcols = 0;
5374   ncol = 0;
5375   for (i=0;i<nrows;i++) {
5376     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5377     ncol = PetscMax(pnnz[i],ncol);
5378     /* diag */
5379     dntotalcols += nlcols[i*2+0];
5380     /* off diag */
5381     ontotalcols += nlcols[i*2+1];
5382   }
5383   /* We do not need to figure the right number of columns
5384    * since all the calculations will be done by going through the raw data
5385    * */
5386   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5387   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5388   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5389   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5390   /* diag */
5391   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5392   /* off diag */
5393   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5394   /* diag */
5395   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5396   /* off diag */
5397   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5398   dntotalcols = 0;
5399   ontotalcols = 0;
5400   ntotalcols  = 0;
5401   for (i=0;i<nrows;i++) {
5402     owner = 0;
5403     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5404     /* Set iremote for diag matrix */
5405     for (j=0;j<nlcols[i*2+0];j++) {
5406       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5407       iremote[dntotalcols].rank    = owner;
5408       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5409       ilocal[dntotalcols++]        = ntotalcols++;
5410     }
5411     /* off diag */
5412     for (j=0;j<nlcols[i*2+1];j++) {
5413       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5414       oiremote[ontotalcols].rank    = owner;
5415       oilocal[ontotalcols++]        = ntotalcols++;
5416     }
5417   }
5418   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5419   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5420   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5421   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5422   /* P serves as roots and P_oth is leaves
5423    * Diag matrix
5424    * */
5425   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5426   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5427   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5428 
5429   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5430   /* Off diag */
5431   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5432   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5433   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5434   /* We operate on the matrix internal data for saving memory */
5435   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5436   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5437   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5438   /* Convert to global indices for diag matrix */
5439   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5440   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5441   /* We want P_oth store global indices */
5442   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5443   /* Use memory scalable approach */
5444   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5445   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5446   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5447   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5448   /* Convert back to local indices */
5449   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5450   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5451   nout = 0;
5452   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5453   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5454   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5455   /* Exchange values */
5456   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5457   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5458   /* Stop PETSc from shrinking memory */
5459   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5460   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5461   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5462   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5463   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5464   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5465   /* ``New MatDestroy" takes care of PetscSF objects as well */
5466   (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF;
5467   PetscFunctionReturn(0);
5468 }
5469 
5470 /*
5471  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5472  * This supports MPIAIJ and MAIJ
5473  * */
5474 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5475 {
5476   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5477   Mat_SeqAIJ            *p_oth;
5478   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5479   IS                    rows,map;
5480   PetscHMapI            hamp;
5481   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5482   MPI_Comm              comm;
5483   PetscSF               sf,osf;
5484   PetscBool             has;
5485   PetscErrorCode        ierr;
5486 
5487   PetscFunctionBegin;
5488   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5489   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5490   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5491    *  and then create a submatrix (that often is an overlapping matrix)
5492    * */
5493   if (reuse==MAT_INITIAL_MATRIX) {
5494     /* Use a hash table to figure out unique keys */
5495     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5496     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5497     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5498     count = 0;
5499     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5500     for (i=0;i<a->B->cmap->n;i++) {
5501       key  = a->garray[i]/dof;
5502       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5503       if (!has) {
5504         mapping[i] = count;
5505         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5506       } else {
5507         /* Current 'i' has the same value the previous step */
5508         mapping[i] = count-1;
5509       }
5510     }
5511     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5512     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5513     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5514     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5515     off = 0;
5516     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5517     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5518     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5519     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5520     /* In case, the matrix was already created but users want to recreate the matrix */
5521     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5522     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5523     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5524     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5525   } else if (reuse==MAT_REUSE_MATRIX) {
5526     /* If matrix was already created, we simply update values using SF objects
5527      * that as attached to the matrix ealier.
5528      *  */
5529     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5530     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5531     if (!sf || !osf) {
5532       SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n");
5533     }
5534     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5535     /* Update values in place */
5536     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5537     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5538     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5539     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5540   } else {
5541     SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n");
5542   }
5543   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5544   PetscFunctionReturn(0);
5545 }
5546 
5547 /*@C
5548     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5549 
5550     Collective on Mat
5551 
5552    Input Parameters:
5553 +    A,B - the matrices in mpiaij format
5554 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5555 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5556 
5557    Output Parameter:
5558 +    rowb, colb - index sets of rows and columns of B to extract
5559 -    B_seq - the sequential matrix generated
5560 
5561     Level: developer
5562 
5563 @*/
5564 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5565 {
5566   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5567   PetscErrorCode ierr;
5568   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5569   IS             isrowb,iscolb;
5570   Mat            *bseq=NULL;
5571 
5572   PetscFunctionBegin;
5573   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5574     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5575   }
5576   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5577 
5578   if (scall == MAT_INITIAL_MATRIX) {
5579     start = A->cmap->rstart;
5580     cmap  = a->garray;
5581     nzA   = a->A->cmap->n;
5582     nzB   = a->B->cmap->n;
5583     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5584     ncols = 0;
5585     for (i=0; i<nzB; i++) {  /* row < local row index */
5586       if (cmap[i] < start) idx[ncols++] = cmap[i];
5587       else break;
5588     }
5589     imark = i;
5590     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5591     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5592     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5593     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5594   } else {
5595     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5596     isrowb  = *rowb; iscolb = *colb;
5597     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5598     bseq[0] = *B_seq;
5599   }
5600   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5601   *B_seq = bseq[0];
5602   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5603   if (!rowb) {
5604     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5605   } else {
5606     *rowb = isrowb;
5607   }
5608   if (!colb) {
5609     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5610   } else {
5611     *colb = iscolb;
5612   }
5613   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5614   PetscFunctionReturn(0);
5615 }
5616 
5617 /*
5618     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5619     of the OFF-DIAGONAL portion of local A
5620 
5621     Collective on Mat
5622 
5623    Input Parameters:
5624 +    A,B - the matrices in mpiaij format
5625 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5626 
5627    Output Parameter:
5628 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5629 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5630 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5631 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5632 
5633     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5634      for this matrix. This is not desirable..
5635 
5636     Level: developer
5637 
5638 */
5639 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5640 {
5641   PetscErrorCode         ierr;
5642   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5643   Mat_SeqAIJ             *b_oth;
5644   VecScatter             ctx;
5645   MPI_Comm               comm;
5646   const PetscMPIInt      *rprocs,*sprocs;
5647   const PetscInt         *srow,*rstarts,*sstarts;
5648   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5649   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len;
5650   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5651   MPI_Request            *rwaits = NULL,*swaits = NULL;
5652   MPI_Status             rstatus;
5653   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5654 
5655   PetscFunctionBegin;
5656   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5657   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5658 
5659   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5660     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5661   }
5662   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5663   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5664 
5665   if (size == 1) {
5666     startsj_s = NULL;
5667     bufa_ptr  = NULL;
5668     *B_oth    = NULL;
5669     PetscFunctionReturn(0);
5670   }
5671 
5672   ctx = a->Mvctx;
5673   tag = ((PetscObject)ctx)->tag;
5674 
5675   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5676   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5677   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5678   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5679   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5680   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5681   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5682 
5683   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5684   if (scall == MAT_INITIAL_MATRIX) {
5685     /* i-array */
5686     /*---------*/
5687     /*  post receives */
5688     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5689     for (i=0; i<nrecvs; i++) {
5690       rowlen = rvalues + rstarts[i]*rbs;
5691       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5692       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5693     }
5694 
5695     /* pack the outgoing message */
5696     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5697 
5698     sstartsj[0] = 0;
5699     rstartsj[0] = 0;
5700     len         = 0; /* total length of j or a array to be sent */
5701     if (nsends) {
5702       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5703       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5704     }
5705     for (i=0; i<nsends; i++) {
5706       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5707       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5708       for (j=0; j<nrows; j++) {
5709         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5710         for (l=0; l<sbs; l++) {
5711           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5712 
5713           rowlen[j*sbs+l] = ncols;
5714 
5715           len += ncols;
5716           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5717         }
5718         k++;
5719       }
5720       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5721 
5722       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5723     }
5724     /* recvs and sends of i-array are completed */
5725     i = nrecvs;
5726     while (i--) {
5727       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5728     }
5729     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5730     ierr = PetscFree(svalues);CHKERRQ(ierr);
5731 
5732     /* allocate buffers for sending j and a arrays */
5733     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5734     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5735 
5736     /* create i-array of B_oth */
5737     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5738 
5739     b_othi[0] = 0;
5740     len       = 0; /* total length of j or a array to be received */
5741     k         = 0;
5742     for (i=0; i<nrecvs; i++) {
5743       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5744       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5745       for (j=0; j<nrows; j++) {
5746         b_othi[k+1] = b_othi[k] + rowlen[j];
5747         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5748         k++;
5749       }
5750       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5751     }
5752     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5753 
5754     /* allocate space for j and a arrrays of B_oth */
5755     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5756     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5757 
5758     /* j-array */
5759     /*---------*/
5760     /*  post receives of j-array */
5761     for (i=0; i<nrecvs; i++) {
5762       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5763       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5764     }
5765 
5766     /* pack the outgoing message j-array */
5767     if (nsends) k = sstarts[0];
5768     for (i=0; i<nsends; i++) {
5769       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5770       bufJ  = bufj+sstartsj[i];
5771       for (j=0; j<nrows; j++) {
5772         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5773         for (ll=0; ll<sbs; ll++) {
5774           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5775           for (l=0; l<ncols; l++) {
5776             *bufJ++ = cols[l];
5777           }
5778           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5779         }
5780       }
5781       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5782     }
5783 
5784     /* recvs and sends of j-array are completed */
5785     i = nrecvs;
5786     while (i--) {
5787       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5788     }
5789     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5790   } else if (scall == MAT_REUSE_MATRIX) {
5791     sstartsj = *startsj_s;
5792     rstartsj = *startsj_r;
5793     bufa     = *bufa_ptr;
5794     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5795     b_otha   = b_oth->a;
5796   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5797 
5798   /* a-array */
5799   /*---------*/
5800   /*  post receives of a-array */
5801   for (i=0; i<nrecvs; i++) {
5802     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5803     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5804   }
5805 
5806   /* pack the outgoing message a-array */
5807   if (nsends) k = sstarts[0];
5808   for (i=0; i<nsends; i++) {
5809     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5810     bufA  = bufa+sstartsj[i];
5811     for (j=0; j<nrows; j++) {
5812       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5813       for (ll=0; ll<sbs; ll++) {
5814         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5815         for (l=0; l<ncols; l++) {
5816           *bufA++ = vals[l];
5817         }
5818         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5819       }
5820     }
5821     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5822   }
5823   /* recvs and sends of a-array are completed */
5824   i = nrecvs;
5825   while (i--) {
5826     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5827   }
5828   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5829   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5830 
5831   if (scall == MAT_INITIAL_MATRIX) {
5832     /* put together the new matrix */
5833     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5834 
5835     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5836     /* Since these are PETSc arrays, change flags to free them as necessary. */
5837     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5838     b_oth->free_a  = PETSC_TRUE;
5839     b_oth->free_ij = PETSC_TRUE;
5840     b_oth->nonew   = 0;
5841 
5842     ierr = PetscFree(bufj);CHKERRQ(ierr);
5843     if (!startsj_s || !bufa_ptr) {
5844       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5845       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5846     } else {
5847       *startsj_s = sstartsj;
5848       *startsj_r = rstartsj;
5849       *bufa_ptr  = bufa;
5850     }
5851   }
5852 
5853   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5854   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5855   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5856   PetscFunctionReturn(0);
5857 }
5858 
5859 /*@C
5860   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5861 
5862   Not Collective
5863 
5864   Input Parameters:
5865 . A - The matrix in mpiaij format
5866 
5867   Output Parameter:
5868 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5869 . colmap - A map from global column index to local index into lvec
5870 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5871 
5872   Level: developer
5873 
5874 @*/
5875 #if defined(PETSC_USE_CTABLE)
5876 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5877 #else
5878 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5879 #endif
5880 {
5881   Mat_MPIAIJ *a;
5882 
5883   PetscFunctionBegin;
5884   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5885   PetscValidPointer(lvec, 2);
5886   PetscValidPointer(colmap, 3);
5887   PetscValidPointer(multScatter, 4);
5888   a = (Mat_MPIAIJ*) A->data;
5889   if (lvec) *lvec = a->lvec;
5890   if (colmap) *colmap = a->colmap;
5891   if (multScatter) *multScatter = a->Mvctx;
5892   PetscFunctionReturn(0);
5893 }
5894 
5895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5898 #if defined(PETSC_HAVE_MKL_SPARSE)
5899 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5900 #endif
5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5902 #if defined(PETSC_HAVE_ELEMENTAL)
5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5904 #endif
5905 #if defined(PETSC_HAVE_HYPRE)
5906 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5907 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5908 #endif
5909 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5911 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
5912 
5913 /*
5914     Computes (B'*A')' since computing B*A directly is untenable
5915 
5916                n                       p                          p
5917         (              )       (              )         (                  )
5918       m (      A       )  *  n (       B      )   =   m (         C        )
5919         (              )       (              )         (                  )
5920 
5921 */
5922 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5923 {
5924   PetscErrorCode ierr;
5925   Mat            At,Bt,Ct;
5926 
5927   PetscFunctionBegin;
5928   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5929   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5930   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5931   ierr = MatDestroy(&At);CHKERRQ(ierr);
5932   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5933   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5934   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5935   PetscFunctionReturn(0);
5936 }
5937 
5938 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5939 {
5940   PetscErrorCode ierr;
5941   PetscInt       m=A->rmap->n,n=B->cmap->n;
5942   Mat            Cmat;
5943 
5944   PetscFunctionBegin;
5945   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5946   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5947   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5948   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5949   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5950   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5951   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5952   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5953 
5954   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5955 
5956   *C = Cmat;
5957   PetscFunctionReturn(0);
5958 }
5959 
5960 /* ----------------------------------------------------------------*/
5961 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5962 {
5963   PetscErrorCode ierr;
5964 
5965   PetscFunctionBegin;
5966   if (scall == MAT_INITIAL_MATRIX) {
5967     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5968     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5969     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5970   }
5971   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5972   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5973   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5974   PetscFunctionReturn(0);
5975 }
5976 
5977 /*MC
5978    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5979 
5980    Options Database Keys:
5981 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5982 
5983    Level: beginner
5984 
5985    Notes:
5986     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
5987     in this case the values associated with the rows and columns one passes in are set to zero
5988     in the matrix
5989 
5990     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
5991     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
5992 
5993 .seealso: MatCreateAIJ()
5994 M*/
5995 
5996 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5997 {
5998   Mat_MPIAIJ     *b;
5999   PetscErrorCode ierr;
6000   PetscMPIInt    size;
6001 
6002   PetscFunctionBegin;
6003   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6004 
6005   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6006   B->data       = (void*)b;
6007   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6008   B->assembled  = PETSC_FALSE;
6009   B->insertmode = NOT_SET_VALUES;
6010   b->size       = size;
6011 
6012   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6013 
6014   /* build cache for off array entries formed */
6015   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6016 
6017   b->donotstash  = PETSC_FALSE;
6018   b->colmap      = 0;
6019   b->garray      = 0;
6020   b->roworiented = PETSC_TRUE;
6021 
6022   /* stuff used for matrix vector multiply */
6023   b->lvec  = NULL;
6024   b->Mvctx = NULL;
6025 
6026   /* stuff for MatGetRow() */
6027   b->rowindices   = 0;
6028   b->rowvalues    = 0;
6029   b->getrowactive = PETSC_FALSE;
6030 
6031   /* flexible pointer used in CUSP/CUSPARSE classes */
6032   b->spptr = NULL;
6033 
6034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6044 #if defined(PETSC_HAVE_MKL_SPARSE)
6045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6046 #endif
6047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6049 #if defined(PETSC_HAVE_ELEMENTAL)
6050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6051 #endif
6052 #if defined(PETSC_HAVE_HYPRE)
6053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6054 #endif
6055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6060 #if defined(PETSC_HAVE_HYPRE)
6061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6062 #endif
6063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
6064   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6065   PetscFunctionReturn(0);
6066 }
6067 
6068 /*@C
6069      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6070          and "off-diagonal" part of the matrix in CSR format.
6071 
6072    Collective
6073 
6074    Input Parameters:
6075 +  comm - MPI communicator
6076 .  m - number of local rows (Cannot be PETSC_DECIDE)
6077 .  n - This value should be the same as the local size used in creating the
6078        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6079        calculated if N is given) For square matrices n is almost always m.
6080 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6081 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6082 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6083 .   j - column indices
6084 .   a - matrix values
6085 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6086 .   oj - column indices
6087 -   oa - matrix values
6088 
6089    Output Parameter:
6090 .   mat - the matrix
6091 
6092    Level: advanced
6093 
6094    Notes:
6095        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6096        must free the arrays once the matrix has been destroyed and not before.
6097 
6098        The i and j indices are 0 based
6099 
6100        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6101 
6102        This sets local rows and cannot be used to set off-processor values.
6103 
6104        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6105        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6106        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6107        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6108        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6109        communication if it is known that only local entries will be set.
6110 
6111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6112           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6113 @*/
6114 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6115 {
6116   PetscErrorCode ierr;
6117   Mat_MPIAIJ     *maij;
6118 
6119   PetscFunctionBegin;
6120   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6121   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6122   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6123   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6124   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6125   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6126   maij = (Mat_MPIAIJ*) (*mat)->data;
6127 
6128   (*mat)->preallocated = PETSC_TRUE;
6129 
6130   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6131   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6132 
6133   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6134   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6135 
6136   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6137   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6138   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6139   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6140 
6141   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6142   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6143   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6144   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6145   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6146   PetscFunctionReturn(0);
6147 }
6148 
6149 /*
6150     Special version for direct calls from Fortran
6151 */
6152 #include <petsc/private/fortranimpl.h>
6153 
6154 /* Change these macros so can be used in void function */
6155 #undef CHKERRQ
6156 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6157 #undef SETERRQ2
6158 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6159 #undef SETERRQ3
6160 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6161 #undef SETERRQ
6162 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6163 
6164 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6165 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6166 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6167 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6168 #else
6169 #endif
6170 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6171 {
6172   Mat            mat  = *mmat;
6173   PetscInt       m    = *mm, n = *mn;
6174   InsertMode     addv = *maddv;
6175   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6176   PetscScalar    value;
6177   PetscErrorCode ierr;
6178 
6179   MatCheckPreallocated(mat,1);
6180   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6181 
6182 #if defined(PETSC_USE_DEBUG)
6183   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6184 #endif
6185   {
6186     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6187     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6188     PetscBool roworiented = aij->roworiented;
6189 
6190     /* Some Variables required in the macro */
6191     Mat        A                 = aij->A;
6192     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6193     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6194     MatScalar  *aa               = a->a;
6195     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6196     Mat        B                 = aij->B;
6197     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6198     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6199     MatScalar  *ba               = b->a;
6200 
6201     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6202     PetscInt  nonew = a->nonew;
6203     MatScalar *ap1,*ap2;
6204 
6205     PetscFunctionBegin;
6206     for (i=0; i<m; i++) {
6207       if (im[i] < 0) continue;
6208 #if defined(PETSC_USE_DEBUG)
6209       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6210 #endif
6211       if (im[i] >= rstart && im[i] < rend) {
6212         row      = im[i] - rstart;
6213         lastcol1 = -1;
6214         rp1      = aj + ai[row];
6215         ap1      = aa + ai[row];
6216         rmax1    = aimax[row];
6217         nrow1    = ailen[row];
6218         low1     = 0;
6219         high1    = nrow1;
6220         lastcol2 = -1;
6221         rp2      = bj + bi[row];
6222         ap2      = ba + bi[row];
6223         rmax2    = bimax[row];
6224         nrow2    = bilen[row];
6225         low2     = 0;
6226         high2    = nrow2;
6227 
6228         for (j=0; j<n; j++) {
6229           if (roworiented) value = v[i*n+j];
6230           else value = v[i+j*m];
6231           if (in[j] >= cstart && in[j] < cend) {
6232             col = in[j] - cstart;
6233             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6234             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6235           } else if (in[j] < 0) continue;
6236 #if defined(PETSC_USE_DEBUG)
6237           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6238           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
6239 #endif
6240           else {
6241             if (mat->was_assembled) {
6242               if (!aij->colmap) {
6243                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6244               }
6245 #if defined(PETSC_USE_CTABLE)
6246               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6247               col--;
6248 #else
6249               col = aij->colmap[in[j]] - 1;
6250 #endif
6251               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
6252               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6253                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6254                 col  =  in[j];
6255                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6256                 B     = aij->B;
6257                 b     = (Mat_SeqAIJ*)B->data;
6258                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6259                 rp2   = bj + bi[row];
6260                 ap2   = ba + bi[row];
6261                 rmax2 = bimax[row];
6262                 nrow2 = bilen[row];
6263                 low2  = 0;
6264                 high2 = nrow2;
6265                 bm    = aij->B->rmap->n;
6266                 ba    = b->a;
6267               }
6268             } else col = in[j];
6269             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6270           }
6271         }
6272       } else if (!aij->donotstash) {
6273         if (roworiented) {
6274           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6275         } else {
6276           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6277         }
6278       }
6279     }
6280   }
6281   PetscFunctionReturnVoid();
6282 }
6283