xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision efbe7e8a80d07327753dbe0b33efee01e046af3f)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_DEVICE)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_DEVICE)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_DEVICE)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
796 
797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802 
803   PetscFunctionBegin;
804   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
805 
806   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
807   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
808   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
813 {
814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
815   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
816   PetscErrorCode ierr;
817   PetscMPIInt    n;
818   PetscInt       i,j,rstart,ncols,flg;
819   PetscInt       *row,*col;
820   PetscBool      other_disassembled;
821   PetscScalar    *val;
822 
823   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
824 
825   PetscFunctionBegin;
826   if (!aij->donotstash && !mat->nooffprocentries) {
827     while (1) {
828       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for (i=0; i<n;) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for (j=i,rstart=row[j]; j<n; j++) {
834           if (row[j] != rstart) break;
835         }
836         if (j < n) ncols = j-i;
837         else       ncols = n-i;
838         /* Now assemble all these values with a single function call */
839         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
840         i    = j;
841       }
842     }
843     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
844   }
845 #if defined(PETSC_HAVE_DEVICE)
846   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
847   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
848   if (mat->boundtocpu) {
849     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
850     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
851   }
852 #endif
853   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourself, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
863     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
864     if (mat->was_assembled && !other_disassembled) {
865 #if defined(PETSC_HAVE_DEVICE)
866       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
867 #endif
868       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
869     }
870   }
871   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
872     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
873   }
874   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
875 #if defined(PETSC_HAVE_DEVICE)
876   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
877 #endif
878   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
879   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
880 
881   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
882 
883   aij->rowvalues = NULL;
884 
885   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
886   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
890     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
891     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
892   }
893 #if defined(PETSC_HAVE_DEVICE)
894   mat->offloadmask = PETSC_OFFLOAD_BOTH;
895 #endif
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
900 {
901   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
906   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
911 {
912   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
913   PetscObjectState sA, sB;
914   PetscInt        *lrows;
915   PetscInt         r, len;
916   PetscBool        cong, lch, gch;
917   PetscErrorCode   ierr;
918 
919   PetscFunctionBegin;
920   /* get locally owned rows */
921   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
922   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
923   /* fix right hand side if needed */
924   if (x && b) {
925     const PetscScalar *xx;
926     PetscScalar       *bb;
927 
928     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
929     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
930     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
931     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
932     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
933     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
934   }
935 
936   sA = mat->A->nonzerostate;
937   sB = mat->B->nonzerostate;
938 
939   if (diag != 0.0 && cong) {
940     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
941     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
942   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
943     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
944     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
945     PetscInt   nnwA, nnwB;
946     PetscBool  nnzA, nnzB;
947 
948     nnwA = aijA->nonew;
949     nnwB = aijB->nonew;
950     nnzA = aijA->keepnonzeropattern;
951     nnzB = aijB->keepnonzeropattern;
952     if (!nnzA) {
953       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
954       aijA->nonew = 0;
955     }
956     if (!nnzB) {
957       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
958       aijB->nonew = 0;
959     }
960     /* Must zero here before the next loop */
961     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
962     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
963     for (r = 0; r < len; ++r) {
964       const PetscInt row = lrows[r] + A->rmap->rstart;
965       if (row >= A->cmap->N) continue;
966       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
967     }
968     aijA->nonew = nnwA;
969     aijB->nonew = nnwB;
970   } else {
971     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
972     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(lrows);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977 
978   /* reduce nonzerostate */
979   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
980   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
981   if (gch) A->nonzerostate++;
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
986 {
987   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscMPIInt       n = A->rmap->n;
990   PetscInt          i,j,r,m,len = 0;
991   PetscInt          *lrows,*owners = A->rmap->range;
992   PetscMPIInt       p = 0;
993   PetscSFNode       *rrows;
994   PetscSF           sf;
995   const PetscScalar *xx;
996   PetscScalar       *bb,*mask;
997   Vec               xmask,lmask;
998   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
999   const PetscInt    *aj, *ii,*ridx;
1000   PetscScalar       *aa;
1001 
1002   PetscFunctionBegin;
1003   /* Create SF where leaves are input rows and roots are owned rows */
1004   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1005   for (r = 0; r < n; ++r) lrows[r] = -1;
1006   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1007   for (r = 0; r < N; ++r) {
1008     const PetscInt idx   = rows[r];
1009     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1010     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1011       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1012     }
1013     rrows[r].rank  = p;
1014     rrows[r].index = rows[r] - owners[p];
1015   }
1016   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1017   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1018   /* Collect flags for rows to be zeroed */
1019   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1020   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1021   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1022   /* Compress and put in row numbers */
1023   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1024   /* zero diagonal part of matrix */
1025   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1026   /* handle off diagonal part of matrix */
1027   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1028   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1029   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1030   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1031   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1035   if (x && b) { /* this code is buggy when the row and column layout don't match */
1036     PetscBool cong;
1037 
1038     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1039     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1040     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1043     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1044   }
1045   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1046   /* remove zeroed rows of off diagonal matrix */
1047   ii = aij->i;
1048   for (i=0; i<len; i++) {
1049     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1050   }
1051   /* loop over all elements of off process part of matrix zeroing removed columns*/
1052   if (aij->compressedrow.use) {
1053     m    = aij->compressedrow.nrows;
1054     ii   = aij->compressedrow.i;
1055     ridx = aij->compressedrow.rindex;
1056     for (i=0; i<m; i++) {
1057       n  = ii[i+1] - ii[i];
1058       aj = aij->j + ii[i];
1059       aa = aij->a + ii[i];
1060 
1061       for (j=0; j<n; j++) {
1062         if (PetscAbsScalar(mask[*aj])) {
1063           if (b) bb[*ridx] -= *aa*xx[*aj];
1064           *aa = 0.0;
1065         }
1066         aa++;
1067         aj++;
1068       }
1069       ridx++;
1070     }
1071   } else { /* do not use compressed row format */
1072     m = l->B->rmap->n;
1073     for (i=0; i<m; i++) {
1074       n  = ii[i+1] - ii[i];
1075       aj = aij->j + ii[i];
1076       aa = aij->a + ii[i];
1077       for (j=0; j<n; j++) {
1078         if (PetscAbsScalar(mask[*aj])) {
1079           if (b) bb[i] -= *aa*xx[*aj];
1080           *aa = 0.0;
1081         }
1082         aa++;
1083         aj++;
1084       }
1085     }
1086   }
1087   if (x && b) {
1088     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1089     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1090   }
1091   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1092   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1093   ierr = PetscFree(lrows);CHKERRQ(ierr);
1094 
1095   /* only change matrix nonzero state if pattern was allowed to be changed */
1096   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1097     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1098     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107   PetscInt       nt;
1108   VecScatter     Mvctx = a->Mvctx;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1112   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1113   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1131 {
1132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   VecScatter     Mvctx = a->Mvctx;
1135 
1136   PetscFunctionBegin;
1137   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1138   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   /* do nondiagonal part */
1152   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1153   /* do local part */
1154   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1155   /* add partial results together */
1156   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1162 {
1163   MPI_Comm       comm;
1164   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1165   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1166   IS             Me,Notme;
1167   PetscErrorCode ierr;
1168   PetscInt       M,N,first,last,*notme,i;
1169   PetscBool      lf;
1170   PetscMPIInt    size;
1171 
1172   PetscFunctionBegin;
1173   /* Easy test: symmetric diagonal block */
1174   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1175   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1176   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1177   if (!*f) PetscFunctionReturn(0);
1178   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1180   if (size == 1) PetscFunctionReturn(0);
1181 
1182   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1183   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1184   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1185   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1186   for (i=0; i<first; i++) notme[i] = i;
1187   for (i=last; i<M; i++) notme[i-last+first] = i;
1188   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1189   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1190   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1191   Aoff = Aoffs[0];
1192   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1193   Boff = Boffs[0];
1194   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1195   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1196   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1197   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1198   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1199   ierr = PetscFree(notme);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1204 {
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   /* do nondiagonal part */
1219   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* add partial results together */
1223   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1233 {
1234   PetscErrorCode ierr;
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236 
1237   PetscFunctionBegin;
1238   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1239   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1245 {
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1251   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1256 {
1257   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261 #if defined(PETSC_USE_LOG)
1262   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1263 #endif
1264   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1266   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1267   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1268 #if defined(PETSC_USE_CTABLE)
1269   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1270 #else
1271   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1272 #endif
1273   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1274   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1275   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1276   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1277   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1278   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1279   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1280 
1281   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1282   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1311 {
1312   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1314   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1315   const PetscInt    *garray = aij->garray;
1316   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1317   PetscInt          *rowlens;
1318   PetscInt          *colidxs;
1319   PetscScalar       *matvals;
1320   PetscErrorCode    ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1324 
1325   M  = mat->rmap->N;
1326   N  = mat->cmap->N;
1327   m  = mat->rmap->n;
1328   rs = mat->rmap->rstart;
1329   cs = mat->cmap->rstart;
1330   nz = A->nz + B->nz;
1331 
1332   /* write matrix header */
1333   header[0] = MAT_FILE_CLASSID;
1334   header[1] = M; header[2] = N; header[3] = nz;
1335   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1336   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1337 
1338   /* fill in and store row lengths  */
1339   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1340   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1341   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1342   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1343 
1344   /* fill in and store column indices */
1345   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1346   for (cnt=0, i=0; i<m; i++) {
1347     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1348       if (garray[B->j[jb]] > cs) break;
1349       colidxs[cnt++] = garray[B->j[jb]];
1350     }
1351     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1352       colidxs[cnt++] = A->j[ja] + cs;
1353     for (; jb<B->i[i+1]; jb++)
1354       colidxs[cnt++] = garray[B->j[jb]];
1355   }
1356   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1357   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1358   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1359 
1360   /* fill in and store nonzero values */
1361   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1362   for (cnt=0, i=0; i<m; i++) {
1363     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1364       if (garray[B->j[jb]] > cs) break;
1365       matvals[cnt++] = B->a[jb];
1366     }
1367     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1368       matvals[cnt++] = A->a[ja];
1369     for (; jb<B->i[i+1]; jb++)
1370       matvals[cnt++] = B->a[jb];
1371   }
1372   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1373   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1374   ierr = PetscFree(matvals);CHKERRQ(ierr);
1375 
1376   /* write block size option to the viewer's .info file */
1377   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #include <petscdraw.h>
1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1383 {
1384   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1385   PetscErrorCode    ierr;
1386   PetscMPIInt       rank = aij->rank,size = aij->size;
1387   PetscBool         isdraw,iascii,isbinary;
1388   PetscViewer       sviewer;
1389   PetscViewerFormat format;
1390 
1391   PetscFunctionBegin;
1392   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1393   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1394   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1395   if (iascii) {
1396     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1397     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1398       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1399       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1400       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1401       for (i=0; i<(PetscInt)size; i++) {
1402         nmax = PetscMax(nmax,nz[i]);
1403         nmin = PetscMin(nmin,nz[i]);
1404         navg += nz[i];
1405       }
1406       ierr = PetscFree(nz);CHKERRQ(ierr);
1407       navg = navg/size;
1408       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1409       PetscFunctionReturn(0);
1410     }
1411     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1412     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1413       MatInfo   info;
1414       PetscBool inodes;
1415 
1416       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1417       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1418       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1419       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1420       if (!inodes) {
1421         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1422                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1423       } else {
1424         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1425                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1426       }
1427       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1429       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1432       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1434       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1435       PetscFunctionReturn(0);
1436     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1437       PetscInt inodecount,inodelimit,*inodes;
1438       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1439       if (inodes) {
1440         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1441       } else {
1442         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1443       }
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1446       PetscFunctionReturn(0);
1447     }
1448   } else if (isbinary) {
1449     if (size == 1) {
1450       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1451       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1452     } else {
1453       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1454     }
1455     PetscFunctionReturn(0);
1456   } else if (iascii && size == 1) {
1457     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1458     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1459     PetscFunctionReturn(0);
1460   } else if (isdraw) {
1461     PetscDraw draw;
1462     PetscBool isnull;
1463     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1464     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1465     if (isnull) PetscFunctionReturn(0);
1466   }
1467 
1468   { /* assemble the entire matrix onto first processor */
1469     Mat A = NULL, Av;
1470     IS  isrow,iscol;
1471 
1472     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1473     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1474     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1475     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1476 /*  The commented code uses MatCreateSubMatrices instead */
1477 /*
1478     Mat *AA, A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1484     if (!rank) {
1485        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1486        A    = AA[0];
1487        Av   = AA[0];
1488     }
1489     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1490 */
1491     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1492     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1493     /*
1494        Everyone has to call to draw the matrix since the graphics waits are
1495        synchronized across all processors that share the PetscDraw object
1496     */
1497     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1498     if (!rank) {
1499       if (((PetscObject)mat)->name) {
1500         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1501       }
1502       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1503     }
1504     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1505     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1506     ierr = MatDestroy(&A);CHKERRQ(ierr);
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1512 {
1513   PetscErrorCode ierr;
1514   PetscBool      iascii,isdraw,issocket,isbinary;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1518   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1519   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1520   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1521   if (iascii || isdraw || isbinary || issocket) {
1522     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1528 {
1529   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1530   PetscErrorCode ierr;
1531   Vec            bb1 = NULL;
1532   PetscBool      hasop;
1533 
1534   PetscFunctionBegin;
1535   if (flag == SOR_APPLY_UPPER) {
1536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537     PetscFunctionReturn(0);
1538   }
1539 
1540   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1541     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1542   }
1543 
1544   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1545     if (flag & SOR_ZERO_INITIAL_GUESS) {
1546       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1547       its--;
1548     }
1549 
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1578     if (flag & SOR_ZERO_INITIAL_GUESS) {
1579       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1580       its--;
1581     }
1582     while (its--) {
1583       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585 
1586       /* update rhs: bb1 = bb - B*x */
1587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1589 
1590       /* local sweep */
1591       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1592     }
1593   } else if (flag & SOR_EISENSTAT) {
1594     Vec xx1;
1595 
1596     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1598 
1599     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1600     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1601     if (!mat->diag) {
1602       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1604     }
1605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1606     if (hasop) {
1607       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1608     } else {
1609       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1610     }
1611     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1612 
1613     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1614 
1615     /* local sweep */
1616     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1617     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1618     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1619   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1620 
1621   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1622 
1623   matin->factorerrortype = mat->A->factorerrortype;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1628 {
1629   Mat            aA,aB,Aperm;
1630   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1631   PetscScalar    *aa,*ba;
1632   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1633   PetscSF        rowsf,sf;
1634   IS             parcolp = NULL;
1635   PetscBool      done;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1640   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1641   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1642   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1643 
1644   /* Invert row permutation to find out where my rows should go */
1645   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1646   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1647   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1648   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1649   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1650   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1651 
1652   /* Invert column permutation to find out where my columns should go */
1653   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1654   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1655   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1656   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1657   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1658   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1660 
1661   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1662   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1663   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1664 
1665   /* Find out where my gcols should go */
1666   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1667   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1668   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1669   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1670   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1671   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1672   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1676   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1677   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     PetscInt    row = rdest[i];
1680     PetscMPIInt rowner;
1681     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       PetscInt    col = cdest[aj[j]];
1684       PetscMPIInt cowner;
1685       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1686       if (rowner == cowner) dnnz[i]++;
1687       else onnz[i]++;
1688     }
1689     for (j=bi[i]; j<bi[i+1]; j++) {
1690       PetscInt    col = gcdest[bj[j]];
1691       PetscMPIInt cowner;
1692       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1693       if (rowner == cowner) dnnz[i]++;
1694       else onnz[i]++;
1695     }
1696   }
1697   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1698   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1699   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1700   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1701   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1702 
1703   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1704   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1705   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1706   for (i=0; i<m; i++) {
1707     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1708     PetscInt j0,rowlen;
1709     rowlen = ai[i+1] - ai[i];
1710     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1711       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1712       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     rowlen = bi[i+1] - bi[i];
1715     for (j0=j=0; j<rowlen; j0=j) {
1716       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1717       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1718     }
1719   }
1720   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1723   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1724   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1725   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1726   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1727   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1728   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1729   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1730   *B = Aperm;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1735 {
1736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1741   if (ghosts) *ghosts = aij->garray;
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1746 {
1747   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1748   Mat            A    = mat->A,B = mat->B;
1749   PetscErrorCode ierr;
1750   PetscLogDouble isend[5],irecv[5];
1751 
1752   PetscFunctionBegin;
1753   info->block_size = 1.0;
1754   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1755 
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758 
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760 
1761   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1762   isend[3] += info->memory;  isend[4] += info->mallocs;
1763   if (flag == MAT_LOCAL) {
1764     info->nz_used      = isend[0];
1765     info->nz_allocated = isend[1];
1766     info->nz_unneeded  = isend[2];
1767     info->memory       = isend[3];
1768     info->mallocs      = isend[4];
1769   } else if (flag == MAT_GLOBAL_MAX) {
1770     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1771 
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1779 
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     MatCheckPreallocated(A,1);
1807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1808     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1809     break;
1810   case MAT_ROW_ORIENTED:
1811     MatCheckPreallocated(A,1);
1812     a->roworiented = flg;
1813 
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818   case MAT_SORTED_FULL:
1819     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1820     break;
1821   case MAT_IGNORE_OFF_PROC_ENTRIES:
1822     a->donotstash = flg;
1823     break;
1824   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1825   case MAT_SPD:
1826   case MAT_SYMMETRIC:
1827   case MAT_STRUCTURALLY_SYMMETRIC:
1828   case MAT_HERMITIAN:
1829   case MAT_SYMMETRY_ETERNAL:
1830     break;
1831   case MAT_SUBMAT_SINGLEIS:
1832     A->submat_singleis = flg;
1833     break;
1834   case MAT_STRUCTURE_ONLY:
1835     /* The option is handled directly by MatSetOption() */
1836     break;
1837   default:
1838     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1844 {
1845   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1846   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1847   PetscErrorCode ierr;
1848   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1849   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1850   PetscInt       *cmap,*idx_p;
1851 
1852   PetscFunctionBegin;
1853   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1854   mat->getrowactive = PETSC_TRUE;
1855 
1856   if (!mat->rowvalues && (idx || v)) {
1857     /*
1858         allocate enough space to hold information from the longest row.
1859     */
1860     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1861     PetscInt   max = 1,tmp;
1862     for (i=0; i<matin->rmap->n; i++) {
1863       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1864       if (max < tmp) max = tmp;
1865     }
1866     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1867   }
1868 
1869   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1870   lrow = row - rstart;
1871 
1872   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1873   if (!v)   {pvA = NULL; pvB = NULL;}
1874   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1875   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1876   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1877   nztot = nzA + nzB;
1878 
1879   cmap = mat->garray;
1880   if (v  || idx) {
1881     if (nztot) {
1882       /* Sort by increasing column numbers, assuming A and B already sorted */
1883       PetscInt imark = -1;
1884       if (v) {
1885         *v = v_p = mat->rowvalues;
1886         for (i=0; i<nzB; i++) {
1887           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1888           else break;
1889         }
1890         imark = i;
1891         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1892         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1893       }
1894       if (idx) {
1895         *idx = idx_p = mat->rowindices;
1896         if (imark > -1) {
1897           for (i=0; i<imark; i++) {
1898             idx_p[i] = cmap[cworkB[i]];
1899           }
1900         } else {
1901           for (i=0; i<nzB; i++) {
1902             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1903             else break;
1904           }
1905           imark = i;
1906         }
1907         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1908         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1909       }
1910     } else {
1911       if (idx) *idx = NULL;
1912       if (v)   *v   = NULL;
1913     }
1914   }
1915   *nz  = nztot;
1916   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1917   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1922 {
1923   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1924 
1925   PetscFunctionBegin;
1926   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1927   aij->getrowactive = PETSC_FALSE;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1932 {
1933   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1934   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1935   PetscErrorCode ierr;
1936   PetscInt       i,j,cstart = mat->cmap->rstart;
1937   PetscReal      sum = 0.0;
1938   MatScalar      *v;
1939 
1940   PetscFunctionBegin;
1941   if (aij->size == 1) {
1942     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1943   } else {
1944     if (type == NORM_FROBENIUS) {
1945       v = amat->a;
1946       for (i=0; i<amat->nz; i++) {
1947         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1948       }
1949       v = bmat->a;
1950       for (i=0; i<bmat->nz; i++) {
1951         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1952       }
1953       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1954       *norm = PetscSqrtReal(*norm);
1955       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1960       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1961       *norm = 0.0;
1962       v     = amat->a; jj = amat->j;
1963       for (j=0; j<amat->nz; j++) {
1964         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1965       }
1966       v = bmat->a; jj = bmat->j;
1967       for (j=0; j<bmat->nz; j++) {
1968         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1969       }
1970       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1971       for (j=0; j<mat->cmap->N; j++) {
1972         if (tmp2[j] > *norm) *norm = tmp2[j];
1973       }
1974       ierr = PetscFree(tmp);CHKERRQ(ierr);
1975       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1976       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v   = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1992       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1993     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1999 {
2000   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2001   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2002   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2003   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2004   PetscErrorCode  ierr;
2005   Mat             B,A_diag,*B_diag;
2006   const MatScalar *array;
2007 
2008   PetscFunctionBegin;
2009   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2010   ai = Aloc->i; aj = Aloc->j;
2011   bi = Bloc->i; bj = Bloc->j;
2012   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2013     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2014     PetscSFNode          *oloc;
2015     PETSC_UNUSED PetscSF sf;
2016 
2017     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2018     /* compute d_nnz for preallocation */
2019     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2020     for (i=0; i<ai[ma]; i++) {
2021       d_nnz[aj[i]]++;
2022     }
2023     /* compute local off-diagonal contributions */
2024     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2025     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2026     /* map those to global */
2027     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2028     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2029     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2030     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2031     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2032     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2033     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2034 
2035     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2036     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2037     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2038     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2039     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2040     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2041   } else {
2042     B    = *matout;
2043     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2044   }
2045 
2046   b           = (Mat_MPIAIJ*)B->data;
2047   A_diag      = a->A;
2048   B_diag      = &b->A;
2049   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2050   A_diag_ncol = A_diag->cmap->N;
2051   B_diag_ilen = sub_B_diag->ilen;
2052   B_diag_i    = sub_B_diag->i;
2053 
2054   /* Set ilen for diagonal of B */
2055   for (i=0; i<A_diag_ncol; i++) {
2056     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2057   }
2058 
2059   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2060   very quickly (=without using MatSetValues), because all writes are local. */
2061   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2062 
2063   /* copy over the B part */
2064   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2065   array = Bloc->a;
2066   row   = A->rmap->rstart;
2067   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2068   cols_tmp = cols;
2069   for (i=0; i<mb; i++) {
2070     ncol = bi[i+1]-bi[i];
2071     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2072     row++;
2073     array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2088 {
2089   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2090   Mat            a    = aij->A,b = aij->B;
2091   PetscErrorCode ierr;
2092   PetscInt       s1,s2,s3;
2093 
2094   PetscFunctionBegin;
2095   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2096   if (rr) {
2097     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2098     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2099     /* Overlap communication with computation. */
2100     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101   }
2102   if (ll) {
2103     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2104     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2105     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2106   }
2107   /* scale  the diagonal block */
2108   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2109 
2110   if (rr) {
2111     /* Do a scatter end and then right scale the off-diagonal block */
2112     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2119 {
2120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2121   PetscErrorCode ierr;
2122 
2123   PetscFunctionBegin;
2124   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2129 {
2130   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2131   Mat            a,b,c,d;
2132   PetscBool      flg;
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   a = matA->A; b = matA->B;
2137   c = matB->A; d = matB->B;
2138 
2139   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2142   }
2143   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2148 {
2149   PetscErrorCode ierr;
2150   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2151   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2152 
2153   PetscFunctionBegin;
2154   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2155   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2156     /* because of the column compression in the off-processor part of the matrix a->B,
2157        the number of columns in a->B and b->B may be different, hence we cannot call
2158        the MatCopy() directly on the two parts. If need be, we can provide a more
2159        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2160        then copying the submatrices */
2161     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2162   } else {
2163     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2164     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2165   }
2166   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2171 {
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180    Computes the number of nonzeros per row needed for preallocation when X and Y
2181    have different nonzero structure.
2182 */
2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2184 {
2185   PetscInt       i,j,k,nzx,nzy;
2186 
2187   PetscFunctionBegin;
2188   /* Set the number of nonzeros in the new matrix */
2189   for (i=0; i<m; i++) {
2190     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2191     nzx = xi[i+1] - xi[i];
2192     nzy = yi[i+1] - yi[i];
2193     nnz[i] = 0;
2194     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2195       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2196       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2197       nnz[i]++;
2198     }
2199     for (; k<nzy; k++) nnz[i]++;
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       m = Y->rmap->N;
2209   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2210   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2218 {
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2221   PetscBLASInt   bnz,one=1;
2222   Mat_SeqAIJ     *x,*y;
2223 
2224   PetscFunctionBegin;
2225   if (str == SAME_NONZERO_PATTERN) {
2226     PetscScalar alpha = a;
2227     x    = (Mat_SeqAIJ*)xx->A->data;
2228     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2229     y    = (Mat_SeqAIJ*)yy->A->data;
2230     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2231     x    = (Mat_SeqAIJ*)xx->B->data;
2232     y    = (Mat_SeqAIJ*)yy->B->data;
2233     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2234     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2235     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2236     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2237        will be updated */
2238 #if defined(PETSC_HAVE_DEVICE)
2239     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2240       Y->offloadmask = PETSC_OFFLOAD_CPU;
2241     }
2242 #endif
2243   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2244     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2245   } else {
2246     Mat      B;
2247     PetscInt *nnz_d,*nnz_o;
2248     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2249     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2250     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2251     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2252     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2253     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2254     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2255     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2256     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2257     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2258     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2259     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2260     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2261   }
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2266 
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2283 {
2284   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2285   PetscErrorCode ierr;
2286 
2287   PetscFunctionBegin;
2288   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2289   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2305 {
2306   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode    ierr;
2308   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2309   PetscScalar       *va,*vv;
2310   Vec               vB,vA;
2311   const PetscScalar *vb;
2312 
2313   PetscFunctionBegin;
2314   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2315   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2316 
2317   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2318   if (idx) {
2319     for (i=0; i<m; i++) {
2320       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2321     }
2322   }
2323 
2324   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2325   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2326   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2327 
2328   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2329   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2330   for (i=0; i<m; i++) {
2331     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2332       vv[i] = vb[i];
2333       if (idx) idx[i] = a->garray[idxb[i]];
2334     } else {
2335       vv[i] = va[i];
2336       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2337         idx[i] = a->garray[idxb[i]];
2338     }
2339   }
2340   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2341   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2342   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2343   ierr = PetscFree(idxb);CHKERRQ(ierr);
2344   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2345   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2350 {
2351   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2352   PetscInt       m = A->rmap->n,n = A->cmap->n;
2353   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2354   PetscInt       *cmap  = mat->garray;
2355   PetscInt       *diagIdx, *offdiagIdx;
2356   Vec            diagV, offdiagV;
2357   PetscScalar    *a, *diagA, *offdiagA, *ba;
2358   PetscInt       r,j,col,ncols,*bi,*bj;
2359   PetscErrorCode ierr;
2360   Mat            B = mat->B;
2361   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2362 
2363   PetscFunctionBegin;
2364   /* When a process holds entire A and other processes have no entry */
2365   if (A->cmap->N == n) {
2366     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2367     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2368     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2369     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2370     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2371     PetscFunctionReturn(0);
2372   } else if (n == 0) {
2373     if (m) {
2374       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2375       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2376       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2377     }
2378     PetscFunctionReturn(0);
2379   }
2380 
2381   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2382   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2383   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2384   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2385 
2386   /* Get offdiagIdx[] for implicit 0.0 */
2387   ba = b->a;
2388   bi = b->i;
2389   bj = b->j;
2390   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2391   for (r = 0; r < m; r++) {
2392     ncols = bi[r+1] - bi[r];
2393     if (ncols == A->cmap->N - n) { /* Brow is dense */
2394       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2395     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2396       offdiagA[r] = 0.0;
2397 
2398       /* Find first hole in the cmap */
2399       for (j=0; j<ncols; j++) {
2400         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2401         if (col > j && j < cstart) {
2402           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2403           break;
2404         } else if (col > j + n && j >= cstart) {
2405           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2406           break;
2407         }
2408       }
2409       if (j == ncols && ncols < A->cmap->N - n) {
2410         /* a hole is outside compressed Bcols */
2411         if (ncols == 0) {
2412           if (cstart) {
2413             offdiagIdx[r] = 0;
2414           } else offdiagIdx[r] = cend;
2415         } else { /* ncols > 0 */
2416           offdiagIdx[r] = cmap[ncols-1] + 1;
2417           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2418         }
2419       }
2420     }
2421 
2422     for (j=0; j<ncols; j++) {
2423       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2424       ba++; bj++;
2425     }
2426   }
2427 
2428   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2429   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2430   for (r = 0; r < m; ++r) {
2431     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2432       a[r]   = diagA[r];
2433       if (idx) idx[r] = cstart + diagIdx[r];
2434     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2435       a[r] = diagA[r];
2436       if (idx) {
2437         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2438           idx[r] = cstart + diagIdx[r];
2439         } else idx[r] = offdiagIdx[r];
2440       }
2441     } else {
2442       a[r]   = offdiagA[r];
2443       if (idx) idx[r] = offdiagIdx[r];
2444     }
2445   }
2446   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2447   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2448   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2449   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2450   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2451   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2456 {
2457   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2458   PetscInt       m = A->rmap->n,n = A->cmap->n;
2459   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2460   PetscInt       *cmap  = mat->garray;
2461   PetscInt       *diagIdx, *offdiagIdx;
2462   Vec            diagV, offdiagV;
2463   PetscScalar    *a, *diagA, *offdiagA, *ba;
2464   PetscInt       r,j,col,ncols,*bi,*bj;
2465   PetscErrorCode ierr;
2466   Mat            B = mat->B;
2467   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2468 
2469   PetscFunctionBegin;
2470   /* When a process holds entire A and other processes have no entry */
2471   if (A->cmap->N == n) {
2472     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2473     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2474     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2475     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2476     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2477     PetscFunctionReturn(0);
2478   } else if (n == 0) {
2479     if (m) {
2480       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2481       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2482       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2483     }
2484     PetscFunctionReturn(0);
2485   }
2486 
2487   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2488   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2489   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2490   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2491 
2492   /* Get offdiagIdx[] for implicit 0.0 */
2493   ba = b->a;
2494   bi = b->i;
2495   bj = b->j;
2496   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2497   for (r = 0; r < m; r++) {
2498     ncols = bi[r+1] - bi[r];
2499     if (ncols == A->cmap->N - n) { /* Brow is dense */
2500       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j=0; j<ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols-1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j=0; j<ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2530       ba++; bj++;
2531     }
2532   }
2533 
2534   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2535   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2536   for (r = 0; r < m; ++r) {
2537     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2538       a[r]   = diagA[r];
2539       if (idx) idx[r] = cstart + diagIdx[r];
2540     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2541       a[r] = diagA[r];
2542       if (idx) {
2543         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2544           idx[r] = cstart + diagIdx[r];
2545         } else idx[r] = offdiagIdx[r];
2546       }
2547     } else {
2548       a[r]   = offdiagA[r];
2549       if (idx) idx[r] = offdiagIdx[r];
2550     }
2551   }
2552   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2553   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2554   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2555   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2556   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2557   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2562 {
2563   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2564   PetscInt       m = A->rmap->n,n = A->cmap->n;
2565   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2566   PetscInt       *cmap  = mat->garray;
2567   PetscInt       *diagIdx, *offdiagIdx;
2568   Vec            diagV, offdiagV;
2569   PetscScalar    *a, *diagA, *offdiagA, *ba;
2570   PetscInt       r,j,col,ncols,*bi,*bj;
2571   PetscErrorCode ierr;
2572   Mat            B = mat->B;
2573   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2574 
2575   PetscFunctionBegin;
2576   /* When a process holds entire A and other processes have no entry */
2577   if (A->cmap->N == n) {
2578     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2579     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2580     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2581     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2582     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2583     PetscFunctionReturn(0);
2584   } else if (n == 0) {
2585     if (m) {
2586       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2587       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2588       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2589     }
2590     PetscFunctionReturn(0);
2591   }
2592 
2593   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2594   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2595   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2596   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2597 
2598   /* Get offdiagIdx[] for implicit 0.0 */
2599   ba = b->a;
2600   bi = b->i;
2601   bj = b->j;
2602   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2603   for (r = 0; r < m; r++) {
2604     ncols = bi[r+1] - bi[r];
2605     if (ncols == A->cmap->N - n) { /* Brow is dense */
2606       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2607     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2608       offdiagA[r] = 0.0;
2609 
2610       /* Find first hole in the cmap */
2611       for (j=0; j<ncols; j++) {
2612         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2613         if (col > j && j < cstart) {
2614           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2615           break;
2616         } else if (col > j + n && j >= cstart) {
2617           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2618           break;
2619         }
2620       }
2621       if (j == ncols && ncols < A->cmap->N - n) {
2622         /* a hole is outside compressed Bcols */
2623         if (ncols == 0) {
2624           if (cstart) {
2625             offdiagIdx[r] = 0;
2626           } else offdiagIdx[r] = cend;
2627         } else { /* ncols > 0 */
2628           offdiagIdx[r] = cmap[ncols-1] + 1;
2629           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2630         }
2631       }
2632     }
2633 
2634     for (j=0; j<ncols; j++) {
2635       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2636       ba++; bj++;
2637     }
2638   }
2639 
2640   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2641   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2642   for (r = 0; r < m; ++r) {
2643     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2644       a[r] = diagA[r];
2645       if (idx) idx[r] = cstart + diagIdx[r];
2646     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2647       a[r] = diagA[r];
2648       if (idx) {
2649         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2650           idx[r] = cstart + diagIdx[r];
2651         } else idx[r] = offdiagIdx[r];
2652       }
2653     } else {
2654       a[r] = offdiagA[r];
2655       if (idx) idx[r] = offdiagIdx[r];
2656     }
2657   }
2658   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2659   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2660   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2661   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2662   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2663   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2668 {
2669   PetscErrorCode ierr;
2670   Mat            *dummy;
2671 
2672   PetscFunctionBegin;
2673   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2674   *newmat = *dummy;
2675   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2680 {
2681   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2682   PetscErrorCode ierr;
2683 
2684   PetscFunctionBegin;
2685   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2686   A->factorerrortype = a->A->factorerrortype;
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2691 {
2692   PetscErrorCode ierr;
2693   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2694 
2695   PetscFunctionBegin;
2696   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2697   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2698   if (x->assembled) {
2699     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2700   } else {
2701     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2702   }
2703   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2704   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2705   PetscFunctionReturn(0);
2706 }
2707 
2708 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2709 {
2710   PetscFunctionBegin;
2711   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2712   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 /*@
2717    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2718 
2719    Collective on Mat
2720 
2721    Input Parameters:
2722 +    A - the matrix
2723 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2724 
2725  Level: advanced
2726 
2727 @*/
2728 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2729 {
2730   PetscErrorCode       ierr;
2731 
2732   PetscFunctionBegin;
2733   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2738 {
2739   PetscErrorCode       ierr;
2740   PetscBool            sc = PETSC_FALSE,flg;
2741 
2742   PetscFunctionBegin;
2743   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2744   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2745   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2746   if (flg) {
2747     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2748   }
2749   ierr = PetscOptionsTail();CHKERRQ(ierr);
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2754 {
2755   PetscErrorCode ierr;
2756   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2757   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2758 
2759   PetscFunctionBegin;
2760   if (!Y->preallocated) {
2761     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2762   } else if (!aij->nz) {
2763     PetscInt nonew = aij->nonew;
2764     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2765     aij->nonew = nonew;
2766   }
2767   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2772 {
2773   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2774   PetscErrorCode ierr;
2775 
2776   PetscFunctionBegin;
2777   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2778   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2779   if (d) {
2780     PetscInt rstart;
2781     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2782     *d += rstart;
2783 
2784   }
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2789 {
2790   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2791   PetscErrorCode ierr;
2792 
2793   PetscFunctionBegin;
2794   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 /* -------------------------------------------------------------------*/
2799 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2800                                        MatGetRow_MPIAIJ,
2801                                        MatRestoreRow_MPIAIJ,
2802                                        MatMult_MPIAIJ,
2803                                 /* 4*/ MatMultAdd_MPIAIJ,
2804                                        MatMultTranspose_MPIAIJ,
2805                                        MatMultTransposeAdd_MPIAIJ,
2806                                        NULL,
2807                                        NULL,
2808                                        NULL,
2809                                 /*10*/ NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        MatSOR_MPIAIJ,
2813                                        MatTranspose_MPIAIJ,
2814                                 /*15*/ MatGetInfo_MPIAIJ,
2815                                        MatEqual_MPIAIJ,
2816                                        MatGetDiagonal_MPIAIJ,
2817                                        MatDiagonalScale_MPIAIJ,
2818                                        MatNorm_MPIAIJ,
2819                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2820                                        MatAssemblyEnd_MPIAIJ,
2821                                        MatSetOption_MPIAIJ,
2822                                        MatZeroEntries_MPIAIJ,
2823                                 /*24*/ MatZeroRows_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        NULL,
2827                                        NULL,
2828                                 /*29*/ MatSetUp_MPIAIJ,
2829                                        NULL,
2830                                        NULL,
2831                                        MatGetDiagonalBlock_MPIAIJ,
2832                                        NULL,
2833                                 /*34*/ MatDuplicate_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        NULL,
2837                                        NULL,
2838                                 /*39*/ MatAXPY_MPIAIJ,
2839                                        MatCreateSubMatrices_MPIAIJ,
2840                                        MatIncreaseOverlap_MPIAIJ,
2841                                        MatGetValues_MPIAIJ,
2842                                        MatCopy_MPIAIJ,
2843                                 /*44*/ MatGetRowMax_MPIAIJ,
2844                                        MatScale_MPIAIJ,
2845                                        MatShift_MPIAIJ,
2846                                        MatDiagonalSet_MPIAIJ,
2847                                        MatZeroRowsColumns_MPIAIJ,
2848                                 /*49*/ MatSetRandom_MPIAIJ,
2849                                        NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        NULL,
2853                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2854                                        NULL,
2855                                        MatSetUnfactored_MPIAIJ,
2856                                        MatPermute_MPIAIJ,
2857                                        NULL,
2858                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2859                                        MatDestroy_MPIAIJ,
2860                                        MatView_MPIAIJ,
2861                                        NULL,
2862                                        NULL,
2863                                 /*64*/ NULL,
2864                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2865                                        NULL,
2866                                        NULL,
2867                                        NULL,
2868                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2869                                        MatGetRowMinAbs_MPIAIJ,
2870                                        NULL,
2871                                        NULL,
2872                                        NULL,
2873                                        NULL,
2874                                 /*75*/ MatFDColoringApply_AIJ,
2875                                        MatSetFromOptions_MPIAIJ,
2876                                        NULL,
2877                                        NULL,
2878                                        MatFindZeroDiagonals_MPIAIJ,
2879                                 /*80*/ NULL,
2880                                        NULL,
2881                                        NULL,
2882                                 /*83*/ MatLoad_MPIAIJ,
2883                                        MatIsSymmetric_MPIAIJ,
2884                                        NULL,
2885                                        NULL,
2886                                        NULL,
2887                                        NULL,
2888                                 /*89*/ NULL,
2889                                        NULL,
2890                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2891                                        NULL,
2892                                        NULL,
2893                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2894                                        NULL,
2895                                        NULL,
2896                                        NULL,
2897                                        MatBindToCPU_MPIAIJ,
2898                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2899                                        NULL,
2900                                        NULL,
2901                                        MatConjugate_MPIAIJ,
2902                                        NULL,
2903                                 /*104*/MatSetValuesRow_MPIAIJ,
2904                                        MatRealPart_MPIAIJ,
2905                                        MatImaginaryPart_MPIAIJ,
2906                                        NULL,
2907                                        NULL,
2908                                 /*109*/NULL,
2909                                        NULL,
2910                                        MatGetRowMin_MPIAIJ,
2911                                        NULL,
2912                                        MatMissingDiagonal_MPIAIJ,
2913                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2914                                        NULL,
2915                                        MatGetGhosts_MPIAIJ,
2916                                        NULL,
2917                                        NULL,
2918                                 /*119*/NULL,
2919                                        NULL,
2920                                        NULL,
2921                                        NULL,
2922                                        MatGetMultiProcBlock_MPIAIJ,
2923                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2924                                        MatGetColumnNorms_MPIAIJ,
2925                                        MatInvertBlockDiagonal_MPIAIJ,
2926                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2927                                        MatCreateSubMatricesMPI_MPIAIJ,
2928                                 /*129*/NULL,
2929                                        NULL,
2930                                        NULL,
2931                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2932                                        NULL,
2933                                 /*134*/NULL,
2934                                        NULL,
2935                                        NULL,
2936                                        NULL,
2937                                        NULL,
2938                                 /*139*/MatSetBlockSizes_MPIAIJ,
2939                                        NULL,
2940                                        NULL,
2941                                        MatFDColoringSetUp_MPIXAIJ,
2942                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2943                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2944                                 /*145*/NULL,
2945                                        NULL,
2946                                        NULL
2947 };
2948 
2949 /* ----------------------------------------------------------------------------------------*/
2950 
2951 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2952 {
2953   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2954   PetscErrorCode ierr;
2955 
2956   PetscFunctionBegin;
2957   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2958   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2963 {
2964   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2965   PetscErrorCode ierr;
2966 
2967   PetscFunctionBegin;
2968   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2969   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2974 {
2975   Mat_MPIAIJ     *b;
2976   PetscErrorCode ierr;
2977   PetscMPIInt    size;
2978 
2979   PetscFunctionBegin;
2980   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2981   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2982   b = (Mat_MPIAIJ*)B->data;
2983 
2984 #if defined(PETSC_USE_CTABLE)
2985   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2986 #else
2987   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2988 #endif
2989   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2990   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2991   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2992 
2993   /* Because the B will have been resized we simply destroy it and create a new one each time */
2994   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2995   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2996   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2997   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2998   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2999   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3000   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3001 
3002   if (!B->preallocated) {
3003     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3004     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3005     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3006     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3007     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3008   }
3009 
3010   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3011   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3012   B->preallocated  = PETSC_TRUE;
3013   B->was_assembled = PETSC_FALSE;
3014   B->assembled     = PETSC_FALSE;
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3019 {
3020   Mat_MPIAIJ     *b;
3021   PetscErrorCode ierr;
3022 
3023   PetscFunctionBegin;
3024   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3025   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3026   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3027   b = (Mat_MPIAIJ*)B->data;
3028 
3029 #if defined(PETSC_USE_CTABLE)
3030   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3031 #else
3032   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3033 #endif
3034   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3035   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3036   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3037 
3038   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
3039   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
3040   B->preallocated  = PETSC_TRUE;
3041   B->was_assembled = PETSC_FALSE;
3042   B->assembled = PETSC_FALSE;
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3047 {
3048   Mat            mat;
3049   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3050   PetscErrorCode ierr;
3051 
3052   PetscFunctionBegin;
3053   *newmat = NULL;
3054   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3055   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3056   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3057   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3058   a       = (Mat_MPIAIJ*)mat->data;
3059 
3060   mat->factortype   = matin->factortype;
3061   mat->assembled    = matin->assembled;
3062   mat->insertmode   = NOT_SET_VALUES;
3063   mat->preallocated = matin->preallocated;
3064 
3065   a->size         = oldmat->size;
3066   a->rank         = oldmat->rank;
3067   a->donotstash   = oldmat->donotstash;
3068   a->roworiented  = oldmat->roworiented;
3069   a->rowindices   = NULL;
3070   a->rowvalues    = NULL;
3071   a->getrowactive = PETSC_FALSE;
3072 
3073   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3074   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3075 
3076   if (oldmat->colmap) {
3077 #if defined(PETSC_USE_CTABLE)
3078     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3079 #else
3080     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3081     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3082     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3083 #endif
3084   } else a->colmap = NULL;
3085   if (oldmat->garray) {
3086     PetscInt len;
3087     len  = oldmat->B->cmap->n;
3088     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3089     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3090     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3091   } else a->garray = NULL;
3092 
3093   /* It may happen MatDuplicate is called with a non-assembled matrix
3094      In fact, MatDuplicate only requires the matrix to be preallocated
3095      This may happen inside a DMCreateMatrix_Shell */
3096   if (oldmat->lvec) {
3097     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3098     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3099   }
3100   if (oldmat->Mvctx) {
3101     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3102     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3103   }
3104   if (oldmat->Mvctx_mpi1) {
3105     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
3106     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
3107   }
3108 
3109   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3110   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3111   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3112   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3113   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3114   *newmat = mat;
3115   PetscFunctionReturn(0);
3116 }
3117 
3118 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3119 {
3120   PetscBool      isbinary, ishdf5;
3121   PetscErrorCode ierr;
3122 
3123   PetscFunctionBegin;
3124   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3125   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3126   /* force binary viewer to load .info file if it has not yet done so */
3127   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3128   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3129   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3130   if (isbinary) {
3131     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3132   } else if (ishdf5) {
3133 #if defined(PETSC_HAVE_HDF5)
3134     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3135 #else
3136     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3137 #endif
3138   } else {
3139     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3140   }
3141   PetscFunctionReturn(0);
3142 }
3143 
3144 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3145 {
3146   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3147   PetscInt       *rowidxs,*colidxs;
3148   PetscScalar    *matvals;
3149   PetscErrorCode ierr;
3150 
3151   PetscFunctionBegin;
3152   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3153 
3154   /* read in matrix header */
3155   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3156   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3157   M  = header[1]; N = header[2]; nz = header[3];
3158   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3159   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3160   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3161 
3162   /* set block sizes from the viewer's .info file */
3163   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3164   /* set global sizes if not set already */
3165   if (mat->rmap->N < 0) mat->rmap->N = M;
3166   if (mat->cmap->N < 0) mat->cmap->N = N;
3167   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3168   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3169 
3170   /* check if the matrix sizes are correct */
3171   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3172   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3173 
3174   /* read in row lengths and build row indices */
3175   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3176   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3177   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3178   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3179   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3180   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3181   /* read in column indices and matrix values */
3182   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3183   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3184   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3185   /* store matrix indices and values */
3186   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3187   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3188   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3189   PetscFunctionReturn(0);
3190 }
3191 
3192 /* Not scalable because of ISAllGather() unless getting all columns. */
3193 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3194 {
3195   PetscErrorCode ierr;
3196   IS             iscol_local;
3197   PetscBool      isstride;
3198   PetscMPIInt    lisstride=0,gisstride;
3199 
3200   PetscFunctionBegin;
3201   /* check if we are grabbing all columns*/
3202   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3203 
3204   if (isstride) {
3205     PetscInt  start,len,mstart,mlen;
3206     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3207     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3208     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3209     if (mstart == start && mlen-mstart == len) lisstride = 1;
3210   }
3211 
3212   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3213   if (gisstride) {
3214     PetscInt N;
3215     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3216     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3217     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3218     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3219   } else {
3220     PetscInt cbs;
3221     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3222     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3223     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3224   }
3225 
3226   *isseq = iscol_local;
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 /*
3231  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3232  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3233 
3234  Input Parameters:
3235    mat - matrix
3236    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3237            i.e., mat->rstart <= isrow[i] < mat->rend
3238    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3239            i.e., mat->cstart <= iscol[i] < mat->cend
3240  Output Parameter:
3241    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3242    iscol_o - sequential column index set for retrieving mat->B
3243    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3244  */
3245 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3246 {
3247   PetscErrorCode ierr;
3248   Vec            x,cmap;
3249   const PetscInt *is_idx;
3250   PetscScalar    *xarray,*cmaparray;
3251   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3252   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3253   Mat            B=a->B;
3254   Vec            lvec=a->lvec,lcmap;
3255   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3256   MPI_Comm       comm;
3257   VecScatter     Mvctx=a->Mvctx;
3258 
3259   PetscFunctionBegin;
3260   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3261   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3262 
3263   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3264   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3265   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3266   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3267   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3268 
3269   /* Get start indices */
3270   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3271   isstart -= ncols;
3272   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3273 
3274   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3275   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3276   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3277   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3278   for (i=0; i<ncols; i++) {
3279     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3280     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3281     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3282   }
3283   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3284   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3285   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3286 
3287   /* Get iscol_d */
3288   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3289   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3290   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3291 
3292   /* Get isrow_d */
3293   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3294   rstart = mat->rmap->rstart;
3295   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3296   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3297   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3298   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3299 
3300   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3301   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3302   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3303 
3304   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3305   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3306   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3307 
3308   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3309 
3310   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3311   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3312 
3313   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3314   /* off-process column indices */
3315   count = 0;
3316   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3317   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3318 
3319   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3320   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3321   for (i=0; i<Bn; i++) {
3322     if (PetscRealPart(xarray[i]) > -1.0) {
3323       idx[count]     = i;                   /* local column index in off-diagonal part B */
3324       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3325       count++;
3326     }
3327   }
3328   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3329   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3330 
3331   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3332   /* cannot ensure iscol_o has same blocksize as iscol! */
3333 
3334   ierr = PetscFree(idx);CHKERRQ(ierr);
3335   *garray = cmap1;
3336 
3337   ierr = VecDestroy(&x);CHKERRQ(ierr);
3338   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3339   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3340   PetscFunctionReturn(0);
3341 }
3342 
3343 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3344 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3345 {
3346   PetscErrorCode ierr;
3347   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3348   Mat            M = NULL;
3349   MPI_Comm       comm;
3350   IS             iscol_d,isrow_d,iscol_o;
3351   Mat            Asub = NULL,Bsub = NULL;
3352   PetscInt       n;
3353 
3354   PetscFunctionBegin;
3355   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3356 
3357   if (call == MAT_REUSE_MATRIX) {
3358     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3359     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3360     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3361 
3362     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3363     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3364 
3365     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3366     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3367 
3368     /* Update diagonal and off-diagonal portions of submat */
3369     asub = (Mat_MPIAIJ*)(*submat)->data;
3370     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3371     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3372     if (n) {
3373       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3374     }
3375     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3376     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3377 
3378   } else { /* call == MAT_INITIAL_MATRIX) */
3379     const PetscInt *garray;
3380     PetscInt        BsubN;
3381 
3382     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3383     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3384 
3385     /* Create local submatrices Asub and Bsub */
3386     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3387     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3388 
3389     /* Create submatrix M */
3390     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3391 
3392     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3393     asub = (Mat_MPIAIJ*)M->data;
3394 
3395     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3396     n = asub->B->cmap->N;
3397     if (BsubN > n) {
3398       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3399       const PetscInt *idx;
3400       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3401       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3402 
3403       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3404       j = 0;
3405       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3406       for (i=0; i<n; i++) {
3407         if (j >= BsubN) break;
3408         while (subgarray[i] > garray[j]) j++;
3409 
3410         if (subgarray[i] == garray[j]) {
3411           idx_new[i] = idx[j++];
3412         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3413       }
3414       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3415 
3416       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3417       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3418 
3419     } else if (BsubN < n) {
3420       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3421     }
3422 
3423     ierr = PetscFree(garray);CHKERRQ(ierr);
3424     *submat = M;
3425 
3426     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3427     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3428     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3429 
3430     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3431     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3432 
3433     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3434     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3435   }
3436   PetscFunctionReturn(0);
3437 }
3438 
3439 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3440 {
3441   PetscErrorCode ierr;
3442   IS             iscol_local=NULL,isrow_d;
3443   PetscInt       csize;
3444   PetscInt       n,i,j,start,end;
3445   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3446   MPI_Comm       comm;
3447 
3448   PetscFunctionBegin;
3449   /* If isrow has same processor distribution as mat,
3450      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3451   if (call == MAT_REUSE_MATRIX) {
3452     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3453     if (isrow_d) {
3454       sameRowDist  = PETSC_TRUE;
3455       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3456     } else {
3457       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3458       if (iscol_local) {
3459         sameRowDist  = PETSC_TRUE;
3460         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3461       }
3462     }
3463   } else {
3464     /* Check if isrow has same processor distribution as mat */
3465     sameDist[0] = PETSC_FALSE;
3466     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3467     if (!n) {
3468       sameDist[0] = PETSC_TRUE;
3469     } else {
3470       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3471       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3472       if (i >= start && j < end) {
3473         sameDist[0] = PETSC_TRUE;
3474       }
3475     }
3476 
3477     /* Check if iscol has same processor distribution as mat */
3478     sameDist[1] = PETSC_FALSE;
3479     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3480     if (!n) {
3481       sameDist[1] = PETSC_TRUE;
3482     } else {
3483       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3484       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3485       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3486     }
3487 
3488     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3489     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3490     sameRowDist = tsameDist[0];
3491   }
3492 
3493   if (sameRowDist) {
3494     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3495       /* isrow and iscol have same processor distribution as mat */
3496       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3497       PetscFunctionReturn(0);
3498     } else { /* sameRowDist */
3499       /* isrow has same processor distribution as mat */
3500       if (call == MAT_INITIAL_MATRIX) {
3501         PetscBool sorted;
3502         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3503         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3504         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3505         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3506 
3507         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3508         if (sorted) {
3509           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3510           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3511           PetscFunctionReturn(0);
3512         }
3513       } else { /* call == MAT_REUSE_MATRIX */
3514         IS    iscol_sub;
3515         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3516         if (iscol_sub) {
3517           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3518           PetscFunctionReturn(0);
3519         }
3520       }
3521     }
3522   }
3523 
3524   /* General case: iscol -> iscol_local which has global size of iscol */
3525   if (call == MAT_REUSE_MATRIX) {
3526     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3527     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3528   } else {
3529     if (!iscol_local) {
3530       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3531     }
3532   }
3533 
3534   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3535   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3536 
3537   if (call == MAT_INITIAL_MATRIX) {
3538     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3539     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3540   }
3541   PetscFunctionReturn(0);
3542 }
3543 
3544 /*@C
3545      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3546          and "off-diagonal" part of the matrix in CSR format.
3547 
3548    Collective
3549 
3550    Input Parameters:
3551 +  comm - MPI communicator
3552 .  A - "diagonal" portion of matrix
3553 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3554 -  garray - global index of B columns
3555 
3556    Output Parameter:
3557 .   mat - the matrix, with input A as its local diagonal matrix
3558    Level: advanced
3559 
3560    Notes:
3561        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3562        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3563 
3564 .seealso: MatCreateMPIAIJWithSplitArrays()
3565 @*/
3566 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3567 {
3568   PetscErrorCode ierr;
3569   Mat_MPIAIJ     *maij;
3570   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3571   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3572   PetscScalar    *oa=b->a;
3573   Mat            Bnew;
3574   PetscInt       m,n,N;
3575 
3576   PetscFunctionBegin;
3577   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3578   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3579   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3580   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3581   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3582   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3583 
3584   /* Get global columns of mat */
3585   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3586 
3587   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3588   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3589   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3590   maij = (Mat_MPIAIJ*)(*mat)->data;
3591 
3592   (*mat)->preallocated = PETSC_TRUE;
3593 
3594   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3595   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3596 
3597   /* Set A as diagonal portion of *mat */
3598   maij->A = A;
3599 
3600   nz = oi[m];
3601   for (i=0; i<nz; i++) {
3602     col   = oj[i];
3603     oj[i] = garray[col];
3604   }
3605 
3606    /* Set Bnew as off-diagonal portion of *mat */
3607   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3608   bnew        = (Mat_SeqAIJ*)Bnew->data;
3609   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3610   maij->B     = Bnew;
3611 
3612   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3613 
3614   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3615   b->free_a       = PETSC_FALSE;
3616   b->free_ij      = PETSC_FALSE;
3617   ierr = MatDestroy(&B);CHKERRQ(ierr);
3618 
3619   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3620   bnew->free_a       = PETSC_TRUE;
3621   bnew->free_ij      = PETSC_TRUE;
3622 
3623   /* condense columns of maij->B */
3624   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3625   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3626   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3627   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3628   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3633 
3634 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3635 {
3636   PetscErrorCode ierr;
3637   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3638   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3639   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3640   Mat            M,Msub,B=a->B;
3641   MatScalar      *aa;
3642   Mat_SeqAIJ     *aij;
3643   PetscInt       *garray = a->garray,*colsub,Ncols;
3644   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3645   IS             iscol_sub,iscmap;
3646   const PetscInt *is_idx,*cmap;
3647   PetscBool      allcolumns=PETSC_FALSE;
3648   MPI_Comm       comm;
3649 
3650   PetscFunctionBegin;
3651   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3652 
3653   if (call == MAT_REUSE_MATRIX) {
3654     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3655     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3656     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3657 
3658     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3659     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3660 
3661     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3662     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3663 
3664     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3665 
3666   } else { /* call == MAT_INITIAL_MATRIX) */
3667     PetscBool flg;
3668 
3669     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3670     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3671 
3672     /* (1) iscol -> nonscalable iscol_local */
3673     /* Check for special case: each processor gets entire matrix columns */
3674     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3675     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3676     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3677     if (allcolumns) {
3678       iscol_sub = iscol_local;
3679       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3680       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3681 
3682     } else {
3683       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3684       PetscInt *idx,*cmap1,k;
3685       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3686       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3687       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3688       count = 0;
3689       k     = 0;
3690       for (i=0; i<Ncols; i++) {
3691         j = is_idx[i];
3692         if (j >= cstart && j < cend) {
3693           /* diagonal part of mat */
3694           idx[count]     = j;
3695           cmap1[count++] = i; /* column index in submat */
3696         } else if (Bn) {
3697           /* off-diagonal part of mat */
3698           if (j == garray[k]) {
3699             idx[count]     = j;
3700             cmap1[count++] = i;  /* column index in submat */
3701           } else if (j > garray[k]) {
3702             while (j > garray[k] && k < Bn-1) k++;
3703             if (j == garray[k]) {
3704               idx[count]     = j;
3705               cmap1[count++] = i; /* column index in submat */
3706             }
3707           }
3708         }
3709       }
3710       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3711 
3712       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3713       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3714       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3715 
3716       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3717     }
3718 
3719     /* (3) Create sequential Msub */
3720     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3721   }
3722 
3723   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3724   aij  = (Mat_SeqAIJ*)(Msub)->data;
3725   ii   = aij->i;
3726   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3727 
3728   /*
3729       m - number of local rows
3730       Ncols - number of columns (same on all processors)
3731       rstart - first row in new global matrix generated
3732   */
3733   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3734 
3735   if (call == MAT_INITIAL_MATRIX) {
3736     /* (4) Create parallel newmat */
3737     PetscMPIInt    rank,size;
3738     PetscInt       csize;
3739 
3740     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3741     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3742 
3743     /*
3744         Determine the number of non-zeros in the diagonal and off-diagonal
3745         portions of the matrix in order to do correct preallocation
3746     */
3747 
3748     /* first get start and end of "diagonal" columns */
3749     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3750     if (csize == PETSC_DECIDE) {
3751       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3752       if (mglobal == Ncols) { /* square matrix */
3753         nlocal = m;
3754       } else {
3755         nlocal = Ncols/size + ((Ncols % size) > rank);
3756       }
3757     } else {
3758       nlocal = csize;
3759     }
3760     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3761     rstart = rend - nlocal;
3762     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3763 
3764     /* next, compute all the lengths */
3765     jj    = aij->j;
3766     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3767     olens = dlens + m;
3768     for (i=0; i<m; i++) {
3769       jend = ii[i+1] - ii[i];
3770       olen = 0;
3771       dlen = 0;
3772       for (j=0; j<jend; j++) {
3773         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3774         else dlen++;
3775         jj++;
3776       }
3777       olens[i] = olen;
3778       dlens[i] = dlen;
3779     }
3780 
3781     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3782     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3783 
3784     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3785     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3786     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3787     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3788     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3789     ierr = PetscFree(dlens);CHKERRQ(ierr);
3790 
3791   } else { /* call == MAT_REUSE_MATRIX */
3792     M    = *newmat;
3793     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3794     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3795     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3796     /*
3797          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3798        rather than the slower MatSetValues().
3799     */
3800     M->was_assembled = PETSC_TRUE;
3801     M->assembled     = PETSC_FALSE;
3802   }
3803 
3804   /* (5) Set values of Msub to *newmat */
3805   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3806   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3807 
3808   jj   = aij->j;
3809   aa   = aij->a;
3810   for (i=0; i<m; i++) {
3811     row = rstart + i;
3812     nz  = ii[i+1] - ii[i];
3813     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3814     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3815     jj += nz; aa += nz;
3816   }
3817   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3818 
3819   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3820   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3821 
3822   ierr = PetscFree(colsub);CHKERRQ(ierr);
3823 
3824   /* save Msub, iscol_sub and iscmap used in processor for next request */
3825   if (call ==  MAT_INITIAL_MATRIX) {
3826     *newmat = M;
3827     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3828     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3829 
3830     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3831     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3832 
3833     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3834     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3835 
3836     if (iscol_local) {
3837       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3838       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3839     }
3840   }
3841   PetscFunctionReturn(0);
3842 }
3843 
3844 /*
3845     Not great since it makes two copies of the submatrix, first an SeqAIJ
3846   in local and then by concatenating the local matrices the end result.
3847   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3848 
3849   Note: This requires a sequential iscol with all indices.
3850 */
3851 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3852 {
3853   PetscErrorCode ierr;
3854   PetscMPIInt    rank,size;
3855   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3856   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3857   Mat            M,Mreuse;
3858   MatScalar      *aa,*vwork;
3859   MPI_Comm       comm;
3860   Mat_SeqAIJ     *aij;
3861   PetscBool      colflag,allcolumns=PETSC_FALSE;
3862 
3863   PetscFunctionBegin;
3864   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3865   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3866   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3867 
3868   /* Check for special case: each processor gets entire matrix columns */
3869   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3870   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3871   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3872   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3873 
3874   if (call ==  MAT_REUSE_MATRIX) {
3875     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3876     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3877     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3878   } else {
3879     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3880   }
3881 
3882   /*
3883       m - number of local rows
3884       n - number of columns (same on all processors)
3885       rstart - first row in new global matrix generated
3886   */
3887   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3888   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3889   if (call == MAT_INITIAL_MATRIX) {
3890     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3891     ii  = aij->i;
3892     jj  = aij->j;
3893 
3894     /*
3895         Determine the number of non-zeros in the diagonal and off-diagonal
3896         portions of the matrix in order to do correct preallocation
3897     */
3898 
3899     /* first get start and end of "diagonal" columns */
3900     if (csize == PETSC_DECIDE) {
3901       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3902       if (mglobal == n) { /* square matrix */
3903         nlocal = m;
3904       } else {
3905         nlocal = n/size + ((n % size) > rank);
3906       }
3907     } else {
3908       nlocal = csize;
3909     }
3910     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3911     rstart = rend - nlocal;
3912     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3913 
3914     /* next, compute all the lengths */
3915     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3916     olens = dlens + m;
3917     for (i=0; i<m; i++) {
3918       jend = ii[i+1] - ii[i];
3919       olen = 0;
3920       dlen = 0;
3921       for (j=0; j<jend; j++) {
3922         if (*jj < rstart || *jj >= rend) olen++;
3923         else dlen++;
3924         jj++;
3925       }
3926       olens[i] = olen;
3927       dlens[i] = dlen;
3928     }
3929     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3930     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3931     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3932     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3933     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3934     ierr = PetscFree(dlens);CHKERRQ(ierr);
3935   } else {
3936     PetscInt ml,nl;
3937 
3938     M    = *newmat;
3939     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3940     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3941     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3942     /*
3943          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3944        rather than the slower MatSetValues().
3945     */
3946     M->was_assembled = PETSC_TRUE;
3947     M->assembled     = PETSC_FALSE;
3948   }
3949   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3950   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3951   ii   = aij->i;
3952   jj   = aij->j;
3953   aa   = aij->a;
3954   for (i=0; i<m; i++) {
3955     row   = rstart + i;
3956     nz    = ii[i+1] - ii[i];
3957     cwork = jj;     jj += nz;
3958     vwork = aa;     aa += nz;
3959     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3960   }
3961 
3962   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964   *newmat = M;
3965 
3966   /* save submatrix used in processor for next request */
3967   if (call ==  MAT_INITIAL_MATRIX) {
3968     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3969     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3970   }
3971   PetscFunctionReturn(0);
3972 }
3973 
3974 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3975 {
3976   PetscInt       m,cstart, cend,j,nnz,i,d;
3977   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3978   const PetscInt *JJ;
3979   PetscErrorCode ierr;
3980   PetscBool      nooffprocentries;
3981 
3982   PetscFunctionBegin;
3983   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3984 
3985   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3986   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3987   m      = B->rmap->n;
3988   cstart = B->cmap->rstart;
3989   cend   = B->cmap->rend;
3990   rstart = B->rmap->rstart;
3991 
3992   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3993 
3994   if (PetscDefined(USE_DEBUG)) {
3995     for (i=0; i<m; i++) {
3996       nnz = Ii[i+1]- Ii[i];
3997       JJ  = J + Ii[i];
3998       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3999       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
4000       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4001     }
4002   }
4003 
4004   for (i=0; i<m; i++) {
4005     nnz     = Ii[i+1]- Ii[i];
4006     JJ      = J + Ii[i];
4007     nnz_max = PetscMax(nnz_max,nnz);
4008     d       = 0;
4009     for (j=0; j<nnz; j++) {
4010       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4011     }
4012     d_nnz[i] = d;
4013     o_nnz[i] = nnz - d;
4014   }
4015   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4016   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4017 
4018   for (i=0; i<m; i++) {
4019     ii   = i + rstart;
4020     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4021   }
4022   nooffprocentries    = B->nooffprocentries;
4023   B->nooffprocentries = PETSC_TRUE;
4024   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4025   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4026   B->nooffprocentries = nooffprocentries;
4027 
4028   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4029   PetscFunctionReturn(0);
4030 }
4031 
4032 /*@
4033    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4034    (the default parallel PETSc format).
4035 
4036    Collective
4037 
4038    Input Parameters:
4039 +  B - the matrix
4040 .  i - the indices into j for the start of each local row (starts with zero)
4041 .  j - the column indices for each local row (starts with zero)
4042 -  v - optional values in the matrix
4043 
4044    Level: developer
4045 
4046    Notes:
4047        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4048      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4049      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4050 
4051        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4052 
4053        The format which is used for the sparse matrix input, is equivalent to a
4054     row-major ordering.. i.e for the following matrix, the input data expected is
4055     as shown
4056 
4057 $        1 0 0
4058 $        2 0 3     P0
4059 $       -------
4060 $        4 5 6     P1
4061 $
4062 $     Process0 [P0]: rows_owned=[0,1]
4063 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4064 $        j =  {0,0,2}  [size = 3]
4065 $        v =  {1,2,3}  [size = 3]
4066 $
4067 $     Process1 [P1]: rows_owned=[2]
4068 $        i =  {0,3}    [size = nrow+1  = 1+1]
4069 $        j =  {0,1,2}  [size = 3]
4070 $        v =  {4,5,6}  [size = 3]
4071 
4072 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4073           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4074 @*/
4075 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4076 {
4077   PetscErrorCode ierr;
4078 
4079   PetscFunctionBegin;
4080   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4081   PetscFunctionReturn(0);
4082 }
4083 
4084 /*@C
4085    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4086    (the default parallel PETSc format).  For good matrix assembly performance
4087    the user should preallocate the matrix storage by setting the parameters
4088    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4089    performance can be increased by more than a factor of 50.
4090 
4091    Collective
4092 
4093    Input Parameters:
4094 +  B - the matrix
4095 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4096            (same value is used for all local rows)
4097 .  d_nnz - array containing the number of nonzeros in the various rows of the
4098            DIAGONAL portion of the local submatrix (possibly different for each row)
4099            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4100            The size of this array is equal to the number of local rows, i.e 'm'.
4101            For matrices that will be factored, you must leave room for (and set)
4102            the diagonal entry even if it is zero.
4103 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4104            submatrix (same value is used for all local rows).
4105 -  o_nnz - array containing the number of nonzeros in the various rows of the
4106            OFF-DIAGONAL portion of the local submatrix (possibly different for
4107            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4108            structure. The size of this array is equal to the number
4109            of local rows, i.e 'm'.
4110 
4111    If the *_nnz parameter is given then the *_nz parameter is ignored
4112 
4113    The AIJ format (also called the Yale sparse matrix format or
4114    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4115    storage.  The stored row and column indices begin with zero.
4116    See Users-Manual: ch_mat for details.
4117 
4118    The parallel matrix is partitioned such that the first m0 rows belong to
4119    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4120    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4121 
4122    The DIAGONAL portion of the local submatrix of a processor can be defined
4123    as the submatrix which is obtained by extraction the part corresponding to
4124    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4125    first row that belongs to the processor, r2 is the last row belonging to
4126    the this processor, and c1-c2 is range of indices of the local part of a
4127    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4128    common case of a square matrix, the row and column ranges are the same and
4129    the DIAGONAL part is also square. The remaining portion of the local
4130    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4131 
4132    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4133 
4134    You can call MatGetInfo() to get information on how effective the preallocation was;
4135    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4136    You can also run with the option -info and look for messages with the string
4137    malloc in them to see if additional memory allocation was needed.
4138 
4139    Example usage:
4140 
4141    Consider the following 8x8 matrix with 34 non-zero values, that is
4142    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4143    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4144    as follows:
4145 
4146 .vb
4147             1  2  0  |  0  3  0  |  0  4
4148     Proc0   0  5  6  |  7  0  0  |  8  0
4149             9  0 10  | 11  0  0  | 12  0
4150     -------------------------------------
4151            13  0 14  | 15 16 17  |  0  0
4152     Proc1   0 18  0  | 19 20 21  |  0  0
4153             0  0  0  | 22 23  0  | 24  0
4154     -------------------------------------
4155     Proc2  25 26 27  |  0  0 28  | 29  0
4156            30  0  0  | 31 32 33  |  0 34
4157 .ve
4158 
4159    This can be represented as a collection of submatrices as:
4160 
4161 .vb
4162       A B C
4163       D E F
4164       G H I
4165 .ve
4166 
4167    Where the submatrices A,B,C are owned by proc0, D,E,F are
4168    owned by proc1, G,H,I are owned by proc2.
4169 
4170    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4171    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4172    The 'M','N' parameters are 8,8, and have the same values on all procs.
4173 
4174    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4175    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4176    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4177    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4178    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4179    matrix, ans [DF] as another SeqAIJ matrix.
4180 
4181    When d_nz, o_nz parameters are specified, d_nz storage elements are
4182    allocated for every row of the local diagonal submatrix, and o_nz
4183    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4184    One way to choose d_nz and o_nz is to use the max nonzerors per local
4185    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4186    In this case, the values of d_nz,o_nz are:
4187 .vb
4188      proc0 : dnz = 2, o_nz = 2
4189      proc1 : dnz = 3, o_nz = 2
4190      proc2 : dnz = 1, o_nz = 4
4191 .ve
4192    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4193    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4194    for proc3. i.e we are using 12+15+10=37 storage locations to store
4195    34 values.
4196 
4197    When d_nnz, o_nnz parameters are specified, the storage is specified
4198    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4199    In the above case the values for d_nnz,o_nnz are:
4200 .vb
4201      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4202      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4203      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4204 .ve
4205    Here the space allocated is sum of all the above values i.e 34, and
4206    hence pre-allocation is perfect.
4207 
4208    Level: intermediate
4209 
4210 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4211           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4212 @*/
4213 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4214 {
4215   PetscErrorCode ierr;
4216 
4217   PetscFunctionBegin;
4218   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4219   PetscValidType(B,1);
4220   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4221   PetscFunctionReturn(0);
4222 }
4223 
4224 /*@
4225      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4226          CSR format for the local rows.
4227 
4228    Collective
4229 
4230    Input Parameters:
4231 +  comm - MPI communicator
4232 .  m - number of local rows (Cannot be PETSC_DECIDE)
4233 .  n - This value should be the same as the local size used in creating the
4234        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4235        calculated if N is given) For square matrices n is almost always m.
4236 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4237 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4238 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4239 .   j - column indices
4240 -   a - matrix values
4241 
4242    Output Parameter:
4243 .   mat - the matrix
4244 
4245    Level: intermediate
4246 
4247    Notes:
4248        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4249      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4250      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4251 
4252        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4253 
4254        The format which is used for the sparse matrix input, is equivalent to a
4255     row-major ordering.. i.e for the following matrix, the input data expected is
4256     as shown
4257 
4258        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4259 
4260 $        1 0 0
4261 $        2 0 3     P0
4262 $       -------
4263 $        4 5 6     P1
4264 $
4265 $     Process0 [P0]: rows_owned=[0,1]
4266 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4267 $        j =  {0,0,2}  [size = 3]
4268 $        v =  {1,2,3}  [size = 3]
4269 $
4270 $     Process1 [P1]: rows_owned=[2]
4271 $        i =  {0,3}    [size = nrow+1  = 1+1]
4272 $        j =  {0,1,2}  [size = 3]
4273 $        v =  {4,5,6}  [size = 3]
4274 
4275 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4276           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4277 @*/
4278 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4279 {
4280   PetscErrorCode ierr;
4281 
4282   PetscFunctionBegin;
4283   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4284   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4285   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4286   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4287   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4288   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4289   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4290   PetscFunctionReturn(0);
4291 }
4292 
4293 /*@
4294      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4295          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4296 
4297    Collective
4298 
4299    Input Parameters:
4300 +  mat - the matrix
4301 .  m - number of local rows (Cannot be PETSC_DECIDE)
4302 .  n - This value should be the same as the local size used in creating the
4303        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4304        calculated if N is given) For square matrices n is almost always m.
4305 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4306 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4307 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4308 .  J - column indices
4309 -  v - matrix values
4310 
4311    Level: intermediate
4312 
4313 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4314           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4315 @*/
4316 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4317 {
4318   PetscErrorCode ierr;
4319   PetscInt       cstart,nnz,i,j;
4320   PetscInt       *ld;
4321   PetscBool      nooffprocentries;
4322   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4323   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4324   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4325   const PetscInt *Adi = Ad->i;
4326   PetscInt       ldi,Iii,md;
4327 
4328   PetscFunctionBegin;
4329   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4330   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4331   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4332   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4333 
4334   cstart = mat->cmap->rstart;
4335   if (!Aij->ld) {
4336     /* count number of entries below block diagonal */
4337     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4338     Aij->ld = ld;
4339     for (i=0; i<m; i++) {
4340       nnz  = Ii[i+1]- Ii[i];
4341       j     = 0;
4342       while  (J[j] < cstart && j < nnz) {j++;}
4343       J    += nnz;
4344       ld[i] = j;
4345     }
4346   } else {
4347     ld = Aij->ld;
4348   }
4349 
4350   for (i=0; i<m; i++) {
4351     nnz  = Ii[i+1]- Ii[i];
4352     Iii  = Ii[i];
4353     ldi  = ld[i];
4354     md   = Adi[i+1]-Adi[i];
4355     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4356     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4357     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4358     ad  += md;
4359     ao  += nnz - md;
4360   }
4361   nooffprocentries      = mat->nooffprocentries;
4362   mat->nooffprocentries = PETSC_TRUE;
4363   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4364   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4365   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4366   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4367   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4368   mat->nooffprocentries = nooffprocentries;
4369   PetscFunctionReturn(0);
4370 }
4371 
4372 /*@C
4373    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4374    (the default parallel PETSc format).  For good matrix assembly performance
4375    the user should preallocate the matrix storage by setting the parameters
4376    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4377    performance can be increased by more than a factor of 50.
4378 
4379    Collective
4380 
4381    Input Parameters:
4382 +  comm - MPI communicator
4383 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4384            This value should be the same as the local size used in creating the
4385            y vector for the matrix-vector product y = Ax.
4386 .  n - This value should be the same as the local size used in creating the
4387        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4388        calculated if N is given) For square matrices n is almost always m.
4389 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4390 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4391 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4392            (same value is used for all local rows)
4393 .  d_nnz - array containing the number of nonzeros in the various rows of the
4394            DIAGONAL portion of the local submatrix (possibly different for each row)
4395            or NULL, if d_nz is used to specify the nonzero structure.
4396            The size of this array is equal to the number of local rows, i.e 'm'.
4397 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4398            submatrix (same value is used for all local rows).
4399 -  o_nnz - array containing the number of nonzeros in the various rows of the
4400            OFF-DIAGONAL portion of the local submatrix (possibly different for
4401            each row) or NULL, if o_nz is used to specify the nonzero
4402            structure. The size of this array is equal to the number
4403            of local rows, i.e 'm'.
4404 
4405    Output Parameter:
4406 .  A - the matrix
4407 
4408    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4409    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4410    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4411 
4412    Notes:
4413    If the *_nnz parameter is given then the *_nz parameter is ignored
4414 
4415    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4416    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4417    storage requirements for this matrix.
4418 
4419    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4420    processor than it must be used on all processors that share the object for
4421    that argument.
4422 
4423    The user MUST specify either the local or global matrix dimensions
4424    (possibly both).
4425 
4426    The parallel matrix is partitioned across processors such that the
4427    first m0 rows belong to process 0, the next m1 rows belong to
4428    process 1, the next m2 rows belong to process 2 etc.. where
4429    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4430    values corresponding to [m x N] submatrix.
4431 
4432    The columns are logically partitioned with the n0 columns belonging
4433    to 0th partition, the next n1 columns belonging to the next
4434    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4435 
4436    The DIAGONAL portion of the local submatrix on any given processor
4437    is the submatrix corresponding to the rows and columns m,n
4438    corresponding to the given processor. i.e diagonal matrix on
4439    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4440    etc. The remaining portion of the local submatrix [m x (N-n)]
4441    constitute the OFF-DIAGONAL portion. The example below better
4442    illustrates this concept.
4443 
4444    For a square global matrix we define each processor's diagonal portion
4445    to be its local rows and the corresponding columns (a square submatrix);
4446    each processor's off-diagonal portion encompasses the remainder of the
4447    local matrix (a rectangular submatrix).
4448 
4449    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4450 
4451    When calling this routine with a single process communicator, a matrix of
4452    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4453    type of communicator, use the construction mechanism
4454 .vb
4455      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4456 .ve
4457 
4458 $     MatCreate(...,&A);
4459 $     MatSetType(A,MATMPIAIJ);
4460 $     MatSetSizes(A, m,n,M,N);
4461 $     MatMPIAIJSetPreallocation(A,...);
4462 
4463    By default, this format uses inodes (identical nodes) when possible.
4464    We search for consecutive rows with the same nonzero structure, thereby
4465    reusing matrix information to achieve increased efficiency.
4466 
4467    Options Database Keys:
4468 +  -mat_no_inode  - Do not use inodes
4469 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4470 
4471 
4472 
4473    Example usage:
4474 
4475    Consider the following 8x8 matrix with 34 non-zero values, that is
4476    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4477    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4478    as follows
4479 
4480 .vb
4481             1  2  0  |  0  3  0  |  0  4
4482     Proc0   0  5  6  |  7  0  0  |  8  0
4483             9  0 10  | 11  0  0  | 12  0
4484     -------------------------------------
4485            13  0 14  | 15 16 17  |  0  0
4486     Proc1   0 18  0  | 19 20 21  |  0  0
4487             0  0  0  | 22 23  0  | 24  0
4488     -------------------------------------
4489     Proc2  25 26 27  |  0  0 28  | 29  0
4490            30  0  0  | 31 32 33  |  0 34
4491 .ve
4492 
4493    This can be represented as a collection of submatrices as
4494 
4495 .vb
4496       A B C
4497       D E F
4498       G H I
4499 .ve
4500 
4501    Where the submatrices A,B,C are owned by proc0, D,E,F are
4502    owned by proc1, G,H,I are owned by proc2.
4503 
4504    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4505    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4506    The 'M','N' parameters are 8,8, and have the same values on all procs.
4507 
4508    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4509    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4510    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4511    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4512    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4513    matrix, ans [DF] as another SeqAIJ matrix.
4514 
4515    When d_nz, o_nz parameters are specified, d_nz storage elements are
4516    allocated for every row of the local diagonal submatrix, and o_nz
4517    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4518    One way to choose d_nz and o_nz is to use the max nonzerors per local
4519    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4520    In this case, the values of d_nz,o_nz are
4521 .vb
4522      proc0 : dnz = 2, o_nz = 2
4523      proc1 : dnz = 3, o_nz = 2
4524      proc2 : dnz = 1, o_nz = 4
4525 .ve
4526    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4527    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4528    for proc3. i.e we are using 12+15+10=37 storage locations to store
4529    34 values.
4530 
4531    When d_nnz, o_nnz parameters are specified, the storage is specified
4532    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4533    In the above case the values for d_nnz,o_nnz are
4534 .vb
4535      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4536      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4537      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4538 .ve
4539    Here the space allocated is sum of all the above values i.e 34, and
4540    hence pre-allocation is perfect.
4541 
4542    Level: intermediate
4543 
4544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4545           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4546 @*/
4547 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4548 {
4549   PetscErrorCode ierr;
4550   PetscMPIInt    size;
4551 
4552   PetscFunctionBegin;
4553   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4554   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4555   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4556   if (size > 1) {
4557     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4558     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4559   } else {
4560     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4561     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4562   }
4563   PetscFunctionReturn(0);
4564 }
4565 
4566 /*@C
4567   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4568 
4569   Not collective
4570 
4571   Input Parameter:
4572 . A - The MPIAIJ matrix
4573 
4574   Output Parameters:
4575 + Ad - The local diagonal block as a SeqAIJ matrix
4576 . Ao - The local off-diagonal block as a SeqAIJ matrix
4577 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4578 
4579   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4580   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4581   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4582   local column numbers to global column numbers in the original matrix.
4583 
4584   Level: intermediate
4585 
4586 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4587 @*/
4588 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4589 {
4590   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4591   PetscBool      flg;
4592   PetscErrorCode ierr;
4593 
4594   PetscFunctionBegin;
4595   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4596   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4597   if (Ad)     *Ad     = a->A;
4598   if (Ao)     *Ao     = a->B;
4599   if (colmap) *colmap = a->garray;
4600   PetscFunctionReturn(0);
4601 }
4602 
4603 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4604 {
4605   PetscErrorCode ierr;
4606   PetscInt       m,N,i,rstart,nnz,Ii;
4607   PetscInt       *indx;
4608   PetscScalar    *values;
4609 
4610   PetscFunctionBegin;
4611   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4612   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4613     PetscInt       *dnz,*onz,sum,bs,cbs;
4614 
4615     if (n == PETSC_DECIDE) {
4616       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4617     }
4618     /* Check sum(n) = N */
4619     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4620     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4621 
4622     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4623     rstart -= m;
4624 
4625     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4626     for (i=0; i<m; i++) {
4627       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4628       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4629       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4630     }
4631 
4632     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4633     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4634     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4635     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4636     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4637     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4638     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4639     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4640   }
4641 
4642   /* numeric phase */
4643   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4644   for (i=0; i<m; i++) {
4645     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4646     Ii   = i + rstart;
4647     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4648     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4649   }
4650   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4651   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4652   PetscFunctionReturn(0);
4653 }
4654 
4655 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4656 {
4657   PetscErrorCode    ierr;
4658   PetscMPIInt       rank;
4659   PetscInt          m,N,i,rstart,nnz;
4660   size_t            len;
4661   const PetscInt    *indx;
4662   PetscViewer       out;
4663   char              *name;
4664   Mat               B;
4665   const PetscScalar *values;
4666 
4667   PetscFunctionBegin;
4668   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4669   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4670   /* Should this be the type of the diagonal block of A? */
4671   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4672   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4673   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4674   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4675   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4676   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4677   for (i=0; i<m; i++) {
4678     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4679     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4680     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4681   }
4682   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4683   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4684 
4685   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4686   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4687   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4688   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4689   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4690   ierr = PetscFree(name);CHKERRQ(ierr);
4691   ierr = MatView(B,out);CHKERRQ(ierr);
4692   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4693   ierr = MatDestroy(&B);CHKERRQ(ierr);
4694   PetscFunctionReturn(0);
4695 }
4696 
4697 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4698 {
4699   PetscErrorCode      ierr;
4700   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4701 
4702   PetscFunctionBegin;
4703   if (!merge) PetscFunctionReturn(0);
4704   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4705   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4706   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4707   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4708   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4709   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4710   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4711   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4712   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4713   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4714   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4715   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4716   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4717   ierr = PetscFree(merge);CHKERRQ(ierr);
4718   PetscFunctionReturn(0);
4719 }
4720 
4721 #include <../src/mat/utils/freespace.h>
4722 #include <petscbt.h>
4723 
4724 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4725 {
4726   PetscErrorCode      ierr;
4727   MPI_Comm            comm;
4728   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4729   PetscMPIInt         size,rank,taga,*len_s;
4730   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4731   PetscInt            proc,m;
4732   PetscInt            **buf_ri,**buf_rj;
4733   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4734   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4735   MPI_Request         *s_waits,*r_waits;
4736   MPI_Status          *status;
4737   MatScalar           *aa=a->a;
4738   MatScalar           **abuf_r,*ba_i;
4739   Mat_Merge_SeqsToMPI *merge;
4740   PetscContainer      container;
4741 
4742   PetscFunctionBegin;
4743   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4744   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4745 
4746   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4747   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4748 
4749   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4750   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4751   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4752 
4753   bi     = merge->bi;
4754   bj     = merge->bj;
4755   buf_ri = merge->buf_ri;
4756   buf_rj = merge->buf_rj;
4757 
4758   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4759   owners = merge->rowmap->range;
4760   len_s  = merge->len_s;
4761 
4762   /* send and recv matrix values */
4763   /*-----------------------------*/
4764   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4765   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4766 
4767   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4768   for (proc=0,k=0; proc<size; proc++) {
4769     if (!len_s[proc]) continue;
4770     i    = owners[proc];
4771     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4772     k++;
4773   }
4774 
4775   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4776   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4777   ierr = PetscFree(status);CHKERRQ(ierr);
4778 
4779   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4780   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4781 
4782   /* insert mat values of mpimat */
4783   /*----------------------------*/
4784   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4785   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4786 
4787   for (k=0; k<merge->nrecv; k++) {
4788     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4789     nrows       = *(buf_ri_k[k]);
4790     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4791     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4792   }
4793 
4794   /* set values of ba */
4795   m = merge->rowmap->n;
4796   for (i=0; i<m; i++) {
4797     arow = owners[rank] + i;
4798     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4799     bnzi = bi[i+1] - bi[i];
4800     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4801 
4802     /* add local non-zero vals of this proc's seqmat into ba */
4803     anzi   = ai[arow+1] - ai[arow];
4804     aj     = a->j + ai[arow];
4805     aa     = a->a + ai[arow];
4806     nextaj = 0;
4807     for (j=0; nextaj<anzi; j++) {
4808       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4809         ba_i[j] += aa[nextaj++];
4810       }
4811     }
4812 
4813     /* add received vals into ba */
4814     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4815       /* i-th row */
4816       if (i == *nextrow[k]) {
4817         anzi   = *(nextai[k]+1) - *nextai[k];
4818         aj     = buf_rj[k] + *(nextai[k]);
4819         aa     = abuf_r[k] + *(nextai[k]);
4820         nextaj = 0;
4821         for (j=0; nextaj<anzi; j++) {
4822           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4823             ba_i[j] += aa[nextaj++];
4824           }
4825         }
4826         nextrow[k]++; nextai[k]++;
4827       }
4828     }
4829     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4830   }
4831   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4832   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4833 
4834   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4835   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4836   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4837   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4838   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4839   PetscFunctionReturn(0);
4840 }
4841 
4842 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4843 {
4844   PetscErrorCode      ierr;
4845   Mat                 B_mpi;
4846   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4847   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4848   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4849   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4850   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4851   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4852   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4853   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4854   MPI_Status          *status;
4855   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4856   PetscBT             lnkbt;
4857   Mat_Merge_SeqsToMPI *merge;
4858   PetscContainer      container;
4859 
4860   PetscFunctionBegin;
4861   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4862 
4863   /* make sure it is a PETSc comm */
4864   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4865   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4866   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4867 
4868   ierr = PetscNew(&merge);CHKERRQ(ierr);
4869   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4870 
4871   /* determine row ownership */
4872   /*---------------------------------------------------------*/
4873   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4874   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4875   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4876   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4877   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4878   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4879   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4880 
4881   m      = merge->rowmap->n;
4882   owners = merge->rowmap->range;
4883 
4884   /* determine the number of messages to send, their lengths */
4885   /*---------------------------------------------------------*/
4886   len_s = merge->len_s;
4887 
4888   len          = 0; /* length of buf_si[] */
4889   merge->nsend = 0;
4890   for (proc=0; proc<size; proc++) {
4891     len_si[proc] = 0;
4892     if (proc == rank) {
4893       len_s[proc] = 0;
4894     } else {
4895       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4896       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4897     }
4898     if (len_s[proc]) {
4899       merge->nsend++;
4900       nrows = 0;
4901       for (i=owners[proc]; i<owners[proc+1]; i++) {
4902         if (ai[i+1] > ai[i]) nrows++;
4903       }
4904       len_si[proc] = 2*(nrows+1);
4905       len         += len_si[proc];
4906     }
4907   }
4908 
4909   /* determine the number and length of messages to receive for ij-structure */
4910   /*-------------------------------------------------------------------------*/
4911   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4912   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4913 
4914   /* post the Irecv of j-structure */
4915   /*-------------------------------*/
4916   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4917   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4918 
4919   /* post the Isend of j-structure */
4920   /*--------------------------------*/
4921   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4922 
4923   for (proc=0, k=0; proc<size; proc++) {
4924     if (!len_s[proc]) continue;
4925     i    = owners[proc];
4926     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4927     k++;
4928   }
4929 
4930   /* receives and sends of j-structure are complete */
4931   /*------------------------------------------------*/
4932   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4933   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4934 
4935   /* send and recv i-structure */
4936   /*---------------------------*/
4937   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4938   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4939 
4940   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4941   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4942   for (proc=0,k=0; proc<size; proc++) {
4943     if (!len_s[proc]) continue;
4944     /* form outgoing message for i-structure:
4945          buf_si[0]:                 nrows to be sent
4946                [1:nrows]:           row index (global)
4947                [nrows+1:2*nrows+1]: i-structure index
4948     */
4949     /*-------------------------------------------*/
4950     nrows       = len_si[proc]/2 - 1;
4951     buf_si_i    = buf_si + nrows+1;
4952     buf_si[0]   = nrows;
4953     buf_si_i[0] = 0;
4954     nrows       = 0;
4955     for (i=owners[proc]; i<owners[proc+1]; i++) {
4956       anzi = ai[i+1] - ai[i];
4957       if (anzi) {
4958         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4959         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4960         nrows++;
4961       }
4962     }
4963     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4964     k++;
4965     buf_si += len_si[proc];
4966   }
4967 
4968   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4969   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4970 
4971   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4972   for (i=0; i<merge->nrecv; i++) {
4973     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4974   }
4975 
4976   ierr = PetscFree(len_si);CHKERRQ(ierr);
4977   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4978   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4979   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4980   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4981   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4982   ierr = PetscFree(status);CHKERRQ(ierr);
4983 
4984   /* compute a local seq matrix in each processor */
4985   /*----------------------------------------------*/
4986   /* allocate bi array and free space for accumulating nonzero column info */
4987   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4988   bi[0] = 0;
4989 
4990   /* create and initialize a linked list */
4991   nlnk = N+1;
4992   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4993 
4994   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4995   len  = ai[owners[rank+1]] - ai[owners[rank]];
4996   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4997 
4998   current_space = free_space;
4999 
5000   /* determine symbolic info for each local row */
5001   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
5002 
5003   for (k=0; k<merge->nrecv; k++) {
5004     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5005     nrows       = *buf_ri_k[k];
5006     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5007     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5008   }
5009 
5010   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5011   len  = 0;
5012   for (i=0; i<m; i++) {
5013     bnzi = 0;
5014     /* add local non-zero cols of this proc's seqmat into lnk */
5015     arow  = owners[rank] + i;
5016     anzi  = ai[arow+1] - ai[arow];
5017     aj    = a->j + ai[arow];
5018     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5019     bnzi += nlnk;
5020     /* add received col data into lnk */
5021     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5022       if (i == *nextrow[k]) { /* i-th row */
5023         anzi  = *(nextai[k]+1) - *nextai[k];
5024         aj    = buf_rj[k] + *nextai[k];
5025         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5026         bnzi += nlnk;
5027         nextrow[k]++; nextai[k]++;
5028       }
5029     }
5030     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5031 
5032     /* if free space is not available, make more free space */
5033     if (current_space->local_remaining<bnzi) {
5034       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5035       nspacedouble++;
5036     }
5037     /* copy data into free space, then initialize lnk */
5038     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5039     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5040 
5041     current_space->array           += bnzi;
5042     current_space->local_used      += bnzi;
5043     current_space->local_remaining -= bnzi;
5044 
5045     bi[i+1] = bi[i] + bnzi;
5046   }
5047 
5048   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5049 
5050   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5051   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5052   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5053 
5054   /* create symbolic parallel matrix B_mpi */
5055   /*---------------------------------------*/
5056   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5057   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5058   if (n==PETSC_DECIDE) {
5059     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5060   } else {
5061     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5062   }
5063   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5064   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5065   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5066   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5067   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5068 
5069   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5070   B_mpi->assembled  = PETSC_FALSE;
5071   merge->bi         = bi;
5072   merge->bj         = bj;
5073   merge->buf_ri     = buf_ri;
5074   merge->buf_rj     = buf_rj;
5075   merge->coi        = NULL;
5076   merge->coj        = NULL;
5077   merge->owners_co  = NULL;
5078 
5079   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5080 
5081   /* attach the supporting struct to B_mpi for reuse */
5082   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5083   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5084   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5085   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5086   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5087   *mpimat = B_mpi;
5088 
5089   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5090   PetscFunctionReturn(0);
5091 }
5092 
5093 /*@C
5094       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5095                  matrices from each processor
5096 
5097     Collective
5098 
5099    Input Parameters:
5100 +    comm - the communicators the parallel matrix will live on
5101 .    seqmat - the input sequential matrices
5102 .    m - number of local rows (or PETSC_DECIDE)
5103 .    n - number of local columns (or PETSC_DECIDE)
5104 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5105 
5106    Output Parameter:
5107 .    mpimat - the parallel matrix generated
5108 
5109     Level: advanced
5110 
5111    Notes:
5112      The dimensions of the sequential matrix in each processor MUST be the same.
5113      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5114      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5115 @*/
5116 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5117 {
5118   PetscErrorCode ierr;
5119   PetscMPIInt    size;
5120 
5121   PetscFunctionBegin;
5122   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5123   if (size == 1) {
5124     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5125     if (scall == MAT_INITIAL_MATRIX) {
5126       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5127     } else {
5128       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5129     }
5130     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5131     PetscFunctionReturn(0);
5132   }
5133   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5134   if (scall == MAT_INITIAL_MATRIX) {
5135     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5136   }
5137   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5138   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5139   PetscFunctionReturn(0);
5140 }
5141 
5142 /*@
5143      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5144           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5145           with MatGetSize()
5146 
5147     Not Collective
5148 
5149    Input Parameters:
5150 +    A - the matrix
5151 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5152 
5153    Output Parameter:
5154 .    A_loc - the local sequential matrix generated
5155 
5156     Level: developer
5157 
5158    Notes:
5159      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5160      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5161      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5162      modify the values of the returned A_loc.
5163 
5164 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5165 
5166 @*/
5167 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5168 {
5169   PetscErrorCode ierr;
5170   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5171   Mat_SeqAIJ     *mat,*a,*b;
5172   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5173   MatScalar      *aa,*ba,*cam;
5174   PetscScalar    *ca;
5175   PetscMPIInt    size;
5176   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5177   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5178   PetscBool      match;
5179 
5180   PetscFunctionBegin;
5181   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5182   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5183   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5184   if (size == 1) {
5185     if (scall == MAT_INITIAL_MATRIX) {
5186       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5187       *A_loc = mpimat->A;
5188     } else if (scall == MAT_REUSE_MATRIX) {
5189       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5190     }
5191     PetscFunctionReturn(0);
5192   }
5193 
5194   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5195   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5196   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5197   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5198   aa = a->a; ba = b->a;
5199   if (scall == MAT_INITIAL_MATRIX) {
5200     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5201     ci[0] = 0;
5202     for (i=0; i<am; i++) {
5203       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5204     }
5205     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5206     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5207     k    = 0;
5208     for (i=0; i<am; i++) {
5209       ncols_o = bi[i+1] - bi[i];
5210       ncols_d = ai[i+1] - ai[i];
5211       /* off-diagonal portion of A */
5212       for (jo=0; jo<ncols_o; jo++) {
5213         col = cmap[*bj];
5214         if (col >= cstart) break;
5215         cj[k]   = col; bj++;
5216         ca[k++] = *ba++;
5217       }
5218       /* diagonal portion of A */
5219       for (j=0; j<ncols_d; j++) {
5220         cj[k]   = cstart + *aj++;
5221         ca[k++] = *aa++;
5222       }
5223       /* off-diagonal portion of A */
5224       for (j=jo; j<ncols_o; j++) {
5225         cj[k]   = cmap[*bj++];
5226         ca[k++] = *ba++;
5227       }
5228     }
5229     /* put together the new matrix */
5230     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5231     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5232     /* Since these are PETSc arrays, change flags to free them as necessary. */
5233     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5234     mat->free_a  = PETSC_TRUE;
5235     mat->free_ij = PETSC_TRUE;
5236     mat->nonew   = 0;
5237   } else if (scall == MAT_REUSE_MATRIX) {
5238     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5239     ci = mat->i; cj = mat->j; cam = mat->a;
5240     for (i=0; i<am; i++) {
5241       /* off-diagonal portion of A */
5242       ncols_o = bi[i+1] - bi[i];
5243       for (jo=0; jo<ncols_o; jo++) {
5244         col = cmap[*bj];
5245         if (col >= cstart) break;
5246         *cam++ = *ba++; bj++;
5247       }
5248       /* diagonal portion of A */
5249       ncols_d = ai[i+1] - ai[i];
5250       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5251       /* off-diagonal portion of A */
5252       for (j=jo; j<ncols_o; j++) {
5253         *cam++ = *ba++; bj++;
5254       }
5255     }
5256   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5257   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5258   PetscFunctionReturn(0);
5259 }
5260 
5261 /*@C
5262      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5263 
5264     Not Collective
5265 
5266    Input Parameters:
5267 +    A - the matrix
5268 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5269 -    row, col - index sets of rows and columns to extract (or NULL)
5270 
5271    Output Parameter:
5272 .    A_loc - the local sequential matrix generated
5273 
5274     Level: developer
5275 
5276 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5277 
5278 @*/
5279 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5280 {
5281   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5282   PetscErrorCode ierr;
5283   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5284   IS             isrowa,iscola;
5285   Mat            *aloc;
5286   PetscBool      match;
5287 
5288   PetscFunctionBegin;
5289   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5290   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5291   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5292   if (!row) {
5293     start = A->rmap->rstart; end = A->rmap->rend;
5294     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5295   } else {
5296     isrowa = *row;
5297   }
5298   if (!col) {
5299     start = A->cmap->rstart;
5300     cmap  = a->garray;
5301     nzA   = a->A->cmap->n;
5302     nzB   = a->B->cmap->n;
5303     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5304     ncols = 0;
5305     for (i=0; i<nzB; i++) {
5306       if (cmap[i] < start) idx[ncols++] = cmap[i];
5307       else break;
5308     }
5309     imark = i;
5310     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5311     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5312     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5313   } else {
5314     iscola = *col;
5315   }
5316   if (scall != MAT_INITIAL_MATRIX) {
5317     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5318     aloc[0] = *A_loc;
5319   }
5320   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5321   if (!col) { /* attach global id of condensed columns */
5322     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5323   }
5324   *A_loc = aloc[0];
5325   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5326   if (!row) {
5327     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5328   }
5329   if (!col) {
5330     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5331   }
5332   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5333   PetscFunctionReturn(0);
5334 }
5335 
5336 /*
5337  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5338  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5339  * on a global size.
5340  * */
5341 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5342 {
5343   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5344   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5345   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5346   PetscMPIInt              owner;
5347   PetscSFNode              *iremote,*oiremote;
5348   const PetscInt           *lrowindices;
5349   PetscErrorCode           ierr;
5350   PetscSF                  sf,osf;
5351   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5352   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5353   MPI_Comm                 comm;
5354   ISLocalToGlobalMapping   mapping;
5355 
5356   PetscFunctionBegin;
5357   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5358   /* plocalsize is the number of roots
5359    * nrows is the number of leaves
5360    * */
5361   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5362   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5363   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5364   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5365   for (i=0;i<nrows;i++) {
5366     /* Find a remote index and an owner for a row
5367      * The row could be local or remote
5368      * */
5369     owner = 0;
5370     lidx  = 0;
5371     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5372     iremote[i].index = lidx;
5373     iremote[i].rank  = owner;
5374   }
5375   /* Create SF to communicate how many nonzero columns for each row */
5376   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5377   /* SF will figure out the number of nonzero colunms for each row, and their
5378    * offsets
5379    * */
5380   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5381   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5382   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5383 
5384   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5385   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5386   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5387   roffsets[0] = 0;
5388   roffsets[1] = 0;
5389   for (i=0;i<plocalsize;i++) {
5390     /* diag */
5391     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5392     /* off diag */
5393     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5394     /* compute offsets so that we relative location for each row */
5395     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5396     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5397   }
5398   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5399   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5400   /* 'r' means root, and 'l' means leaf */
5401   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5402   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5403   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5404   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5405   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5406   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5407   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5408   dntotalcols = 0;
5409   ontotalcols = 0;
5410   ncol = 0;
5411   for (i=0;i<nrows;i++) {
5412     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5413     ncol = PetscMax(pnnz[i],ncol);
5414     /* diag */
5415     dntotalcols += nlcols[i*2+0];
5416     /* off diag */
5417     ontotalcols += nlcols[i*2+1];
5418   }
5419   /* We do not need to figure the right number of columns
5420    * since all the calculations will be done by going through the raw data
5421    * */
5422   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5423   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5424   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5425   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5426   /* diag */
5427   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5428   /* off diag */
5429   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5430   /* diag */
5431   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5432   /* off diag */
5433   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5434   dntotalcols = 0;
5435   ontotalcols = 0;
5436   ntotalcols  = 0;
5437   for (i=0;i<nrows;i++) {
5438     owner = 0;
5439     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5440     /* Set iremote for diag matrix */
5441     for (j=0;j<nlcols[i*2+0];j++) {
5442       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5443       iremote[dntotalcols].rank    = owner;
5444       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5445       ilocal[dntotalcols++]        = ntotalcols++;
5446     }
5447     /* off diag */
5448     for (j=0;j<nlcols[i*2+1];j++) {
5449       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5450       oiremote[ontotalcols].rank    = owner;
5451       oilocal[ontotalcols++]        = ntotalcols++;
5452     }
5453   }
5454   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5455   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5456   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5457   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5458   /* P serves as roots and P_oth is leaves
5459    * Diag matrix
5460    * */
5461   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5462   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5463   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5464 
5465   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5466   /* Off diag */
5467   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5468   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5469   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5470   /* We operate on the matrix internal data for saving memory */
5471   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5472   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5473   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5474   /* Convert to global indices for diag matrix */
5475   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5476   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5477   /* We want P_oth store global indices */
5478   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5479   /* Use memory scalable approach */
5480   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5481   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5482   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5483   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5484   /* Convert back to local indices */
5485   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5486   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5487   nout = 0;
5488   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5489   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5490   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5491   /* Exchange values */
5492   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5493   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5494   /* Stop PETSc from shrinking memory */
5495   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5496   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5497   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5498   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5499   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5500   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5501   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5502   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5503   PetscFunctionReturn(0);
5504 }
5505 
5506 /*
5507  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5508  * This supports MPIAIJ and MAIJ
5509  * */
5510 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5511 {
5512   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5513   Mat_SeqAIJ            *p_oth;
5514   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5515   IS                    rows,map;
5516   PetscHMapI            hamp;
5517   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5518   MPI_Comm              comm;
5519   PetscSF               sf,osf;
5520   PetscBool             has;
5521   PetscErrorCode        ierr;
5522 
5523   PetscFunctionBegin;
5524   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5525   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5526   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5527    *  and then create a submatrix (that often is an overlapping matrix)
5528    * */
5529   if (reuse == MAT_INITIAL_MATRIX) {
5530     /* Use a hash table to figure out unique keys */
5531     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5532     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5533     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5534     count = 0;
5535     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5536     for (i=0;i<a->B->cmap->n;i++) {
5537       key  = a->garray[i]/dof;
5538       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5539       if (!has) {
5540         mapping[i] = count;
5541         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5542       } else {
5543         /* Current 'i' has the same value the previous step */
5544         mapping[i] = count-1;
5545       }
5546     }
5547     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5548     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5549     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5550     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5551     off = 0;
5552     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5553     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5554     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5555     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5556     /* In case, the matrix was already created but users want to recreate the matrix */
5557     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5558     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5559     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5560     ierr = ISDestroy(&map);CHKERRQ(ierr);
5561     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5562   } else if (reuse == MAT_REUSE_MATRIX) {
5563     /* If matrix was already created, we simply update values using SF objects
5564      * that as attached to the matrix ealier.
5565      *  */
5566     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5567     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5568     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5569     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5570     /* Update values in place */
5571     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5572     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5573     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5574     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5575   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5576   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5577   PetscFunctionReturn(0);
5578 }
5579 
5580 /*@C
5581     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5582 
5583     Collective on Mat
5584 
5585    Input Parameters:
5586 +    A,B - the matrices in mpiaij format
5587 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5588 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5589 
5590    Output Parameter:
5591 +    rowb, colb - index sets of rows and columns of B to extract
5592 -    B_seq - the sequential matrix generated
5593 
5594     Level: developer
5595 
5596 @*/
5597 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5598 {
5599   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5600   PetscErrorCode ierr;
5601   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5602   IS             isrowb,iscolb;
5603   Mat            *bseq=NULL;
5604 
5605   PetscFunctionBegin;
5606   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5607     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5608   }
5609   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5610 
5611   if (scall == MAT_INITIAL_MATRIX) {
5612     start = A->cmap->rstart;
5613     cmap  = a->garray;
5614     nzA   = a->A->cmap->n;
5615     nzB   = a->B->cmap->n;
5616     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5617     ncols = 0;
5618     for (i=0; i<nzB; i++) {  /* row < local row index */
5619       if (cmap[i] < start) idx[ncols++] = cmap[i];
5620       else break;
5621     }
5622     imark = i;
5623     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5624     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5625     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5626     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5627   } else {
5628     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5629     isrowb  = *rowb; iscolb = *colb;
5630     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5631     bseq[0] = *B_seq;
5632   }
5633   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5634   *B_seq = bseq[0];
5635   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5636   if (!rowb) {
5637     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5638   } else {
5639     *rowb = isrowb;
5640   }
5641   if (!colb) {
5642     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5643   } else {
5644     *colb = iscolb;
5645   }
5646   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5647   PetscFunctionReturn(0);
5648 }
5649 
5650 /*
5651     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5652     of the OFF-DIAGONAL portion of local A
5653 
5654     Collective on Mat
5655 
5656    Input Parameters:
5657 +    A,B - the matrices in mpiaij format
5658 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5659 
5660    Output Parameter:
5661 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5662 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5663 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5664 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5665 
5666     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5667      for this matrix. This is not desirable..
5668 
5669     Level: developer
5670 
5671 */
5672 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5673 {
5674   PetscErrorCode         ierr;
5675   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5676   Mat_SeqAIJ             *b_oth;
5677   VecScatter             ctx;
5678   MPI_Comm               comm;
5679   const PetscMPIInt      *rprocs,*sprocs;
5680   const PetscInt         *srow,*rstarts,*sstarts;
5681   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5682   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5683   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5684   MPI_Request            *rwaits = NULL,*swaits = NULL;
5685   MPI_Status             rstatus;
5686   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5687 
5688   PetscFunctionBegin;
5689   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5691 
5692   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5693     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5694   }
5695   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5696   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5697 
5698   if (size == 1) {
5699     startsj_s = NULL;
5700     bufa_ptr  = NULL;
5701     *B_oth    = NULL;
5702     PetscFunctionReturn(0);
5703   }
5704 
5705   ctx = a->Mvctx;
5706   tag = ((PetscObject)ctx)->tag;
5707 
5708   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5709   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5710   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5711   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5712   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5713   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5714   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5715 
5716   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5717   if (scall == MAT_INITIAL_MATRIX) {
5718     /* i-array */
5719     /*---------*/
5720     /*  post receives */
5721     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5722     for (i=0; i<nrecvs; i++) {
5723       rowlen = rvalues + rstarts[i]*rbs;
5724       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5725       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5726     }
5727 
5728     /* pack the outgoing message */
5729     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5730 
5731     sstartsj[0] = 0;
5732     rstartsj[0] = 0;
5733     len         = 0; /* total length of j or a array to be sent */
5734     if (nsends) {
5735       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5736       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5737     }
5738     for (i=0; i<nsends; i++) {
5739       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5740       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5741       for (j=0; j<nrows; j++) {
5742         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5743         for (l=0; l<sbs; l++) {
5744           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5745 
5746           rowlen[j*sbs+l] = ncols;
5747 
5748           len += ncols;
5749           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5750         }
5751         k++;
5752       }
5753       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5754 
5755       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5756     }
5757     /* recvs and sends of i-array are completed */
5758     i = nrecvs;
5759     while (i--) {
5760       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5761     }
5762     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5763     ierr = PetscFree(svalues);CHKERRQ(ierr);
5764 
5765     /* allocate buffers for sending j and a arrays */
5766     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5767     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5768 
5769     /* create i-array of B_oth */
5770     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5771 
5772     b_othi[0] = 0;
5773     len       = 0; /* total length of j or a array to be received */
5774     k         = 0;
5775     for (i=0; i<nrecvs; i++) {
5776       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5777       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5778       for (j=0; j<nrows; j++) {
5779         b_othi[k+1] = b_othi[k] + rowlen[j];
5780         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5781         k++;
5782       }
5783       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5784     }
5785     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5786 
5787     /* allocate space for j and a arrrays of B_oth */
5788     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5789     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5790 
5791     /* j-array */
5792     /*---------*/
5793     /*  post receives of j-array */
5794     for (i=0; i<nrecvs; i++) {
5795       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5796       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5797     }
5798 
5799     /* pack the outgoing message j-array */
5800     if (nsends) k = sstarts[0];
5801     for (i=0; i<nsends; i++) {
5802       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5803       bufJ  = bufj+sstartsj[i];
5804       for (j=0; j<nrows; j++) {
5805         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5806         for (ll=0; ll<sbs; ll++) {
5807           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5808           for (l=0; l<ncols; l++) {
5809             *bufJ++ = cols[l];
5810           }
5811           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5812         }
5813       }
5814       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5815     }
5816 
5817     /* recvs and sends of j-array are completed */
5818     i = nrecvs;
5819     while (i--) {
5820       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5821     }
5822     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5823   } else if (scall == MAT_REUSE_MATRIX) {
5824     sstartsj = *startsj_s;
5825     rstartsj = *startsj_r;
5826     bufa     = *bufa_ptr;
5827     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5828     b_otha   = b_oth->a;
5829   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5830 
5831   /* a-array */
5832   /*---------*/
5833   /*  post receives of a-array */
5834   for (i=0; i<nrecvs; i++) {
5835     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5836     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5837   }
5838 
5839   /* pack the outgoing message a-array */
5840   if (nsends) k = sstarts[0];
5841   for (i=0; i<nsends; i++) {
5842     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5843     bufA  = bufa+sstartsj[i];
5844     for (j=0; j<nrows; j++) {
5845       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5846       for (ll=0; ll<sbs; ll++) {
5847         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5848         for (l=0; l<ncols; l++) {
5849           *bufA++ = vals[l];
5850         }
5851         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5852       }
5853     }
5854     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5855   }
5856   /* recvs and sends of a-array are completed */
5857   i = nrecvs;
5858   while (i--) {
5859     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5860   }
5861   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5862   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5863 
5864   if (scall == MAT_INITIAL_MATRIX) {
5865     /* put together the new matrix */
5866     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5867 
5868     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5869     /* Since these are PETSc arrays, change flags to free them as necessary. */
5870     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5871     b_oth->free_a  = PETSC_TRUE;
5872     b_oth->free_ij = PETSC_TRUE;
5873     b_oth->nonew   = 0;
5874 
5875     ierr = PetscFree(bufj);CHKERRQ(ierr);
5876     if (!startsj_s || !bufa_ptr) {
5877       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5878       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5879     } else {
5880       *startsj_s = sstartsj;
5881       *startsj_r = rstartsj;
5882       *bufa_ptr  = bufa;
5883     }
5884   }
5885 
5886   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5887   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5888   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5889   PetscFunctionReturn(0);
5890 }
5891 
5892 /*@C
5893   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5894 
5895   Not Collective
5896 
5897   Input Parameters:
5898 . A - The matrix in mpiaij format
5899 
5900   Output Parameter:
5901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5902 . colmap - A map from global column index to local index into lvec
5903 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5904 
5905   Level: developer
5906 
5907 @*/
5908 #if defined(PETSC_USE_CTABLE)
5909 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5910 #else
5911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5912 #endif
5913 {
5914   Mat_MPIAIJ *a;
5915 
5916   PetscFunctionBegin;
5917   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5918   PetscValidPointer(lvec, 2);
5919   PetscValidPointer(colmap, 3);
5920   PetscValidPointer(multScatter, 4);
5921   a = (Mat_MPIAIJ*) A->data;
5922   if (lvec) *lvec = a->lvec;
5923   if (colmap) *colmap = a->colmap;
5924   if (multScatter) *multScatter = a->Mvctx;
5925   PetscFunctionReturn(0);
5926 }
5927 
5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5931 #if defined(PETSC_HAVE_MKL_SPARSE)
5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5933 #endif
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5936 #if defined(PETSC_HAVE_ELEMENTAL)
5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5938 #endif
5939 #if defined(PETSC_HAVE_SCALAPACK)
5940 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5941 #endif
5942 #if defined(PETSC_HAVE_HYPRE)
5943 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5944 #endif
5945 #if defined(PETSC_HAVE_CUDA)
5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5947 #endif
5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5949 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5950 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5951 
5952 /*
5953     Computes (B'*A')' since computing B*A directly is untenable
5954 
5955                n                       p                          p
5956         [             ]       [             ]         [                 ]
5957       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5958         [             ]       [             ]         [                 ]
5959 
5960 */
5961 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5962 {
5963   PetscErrorCode ierr;
5964   Mat            At,Bt,Ct;
5965 
5966   PetscFunctionBegin;
5967   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5968   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5969   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5970   ierr = MatDestroy(&At);CHKERRQ(ierr);
5971   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5972   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5973   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5974   PetscFunctionReturn(0);
5975 }
5976 
5977 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5978 {
5979   PetscErrorCode ierr;
5980   PetscBool      cisdense;
5981 
5982   PetscFunctionBegin;
5983   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5984   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5985   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5986   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5987   if (!cisdense) {
5988     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5989   }
5990   ierr = MatSetUp(C);CHKERRQ(ierr);
5991 
5992   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5993   PetscFunctionReturn(0);
5994 }
5995 
5996 /* ----------------------------------------------------------------*/
5997 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5998 {
5999   Mat_Product *product = C->product;
6000   Mat         A = product->A,B=product->B;
6001 
6002   PetscFunctionBegin;
6003   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6004     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6005 
6006   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6007   C->ops->productsymbolic = MatProductSymbolic_AB;
6008   PetscFunctionReturn(0);
6009 }
6010 
6011 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6012 {
6013   PetscErrorCode ierr;
6014   Mat_Product    *product = C->product;
6015 
6016   PetscFunctionBegin;
6017   if (product->type == MATPRODUCT_AB) {
6018     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6019   }
6020   PetscFunctionReturn(0);
6021 }
6022 /* ----------------------------------------------------------------*/
6023 
6024 /*MC
6025    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6026 
6027    Options Database Keys:
6028 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6029 
6030    Level: beginner
6031 
6032    Notes:
6033     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6034     in this case the values associated with the rows and columns one passes in are set to zero
6035     in the matrix
6036 
6037     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6038     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6039 
6040 .seealso: MatCreateAIJ()
6041 M*/
6042 
6043 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6044 {
6045   Mat_MPIAIJ     *b;
6046   PetscErrorCode ierr;
6047   PetscMPIInt    size;
6048 
6049   PetscFunctionBegin;
6050   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6051 
6052   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6053   B->data       = (void*)b;
6054   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6055   B->assembled  = PETSC_FALSE;
6056   B->insertmode = NOT_SET_VALUES;
6057   b->size       = size;
6058 
6059   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6060 
6061   /* build cache for off array entries formed */
6062   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6063 
6064   b->donotstash  = PETSC_FALSE;
6065   b->colmap      = NULL;
6066   b->garray      = NULL;
6067   b->roworiented = PETSC_TRUE;
6068 
6069   /* stuff used for matrix vector multiply */
6070   b->lvec  = NULL;
6071   b->Mvctx = NULL;
6072 
6073   /* stuff for MatGetRow() */
6074   b->rowindices   = NULL;
6075   b->rowvalues    = NULL;
6076   b->getrowactive = PETSC_FALSE;
6077 
6078   /* flexible pointer used in CUSP/CUSPARSE classes */
6079   b->spptr = NULL;
6080 
6081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6091 #if defined(PETSC_HAVE_MKL_SPARSE)
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6093 #endif
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6097 #if defined(PETSC_HAVE_ELEMENTAL)
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6099 #endif
6100 #if defined(PETSC_HAVE_SCALAPACK)
6101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6102 #endif
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6105 #if defined(PETSC_HAVE_HYPRE)
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6108 #endif
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6111   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6112   PetscFunctionReturn(0);
6113 }
6114 
6115 /*@C
6116      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6117          and "off-diagonal" part of the matrix in CSR format.
6118 
6119    Collective
6120 
6121    Input Parameters:
6122 +  comm - MPI communicator
6123 .  m - number of local rows (Cannot be PETSC_DECIDE)
6124 .  n - This value should be the same as the local size used in creating the
6125        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6126        calculated if N is given) For square matrices n is almost always m.
6127 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6128 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6129 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6130 .   j - column indices
6131 .   a - matrix values
6132 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6133 .   oj - column indices
6134 -   oa - matrix values
6135 
6136    Output Parameter:
6137 .   mat - the matrix
6138 
6139    Level: advanced
6140 
6141    Notes:
6142        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6143        must free the arrays once the matrix has been destroyed and not before.
6144 
6145        The i and j indices are 0 based
6146 
6147        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6148 
6149        This sets local rows and cannot be used to set off-processor values.
6150 
6151        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6152        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6153        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6154        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6155        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6156        communication if it is known that only local entries will be set.
6157 
6158 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6159           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6160 @*/
6161 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6162 {
6163   PetscErrorCode ierr;
6164   Mat_MPIAIJ     *maij;
6165 
6166   PetscFunctionBegin;
6167   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6168   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6169   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6170   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6171   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6172   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6173   maij = (Mat_MPIAIJ*) (*mat)->data;
6174 
6175   (*mat)->preallocated = PETSC_TRUE;
6176 
6177   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6178   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6179 
6180   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6181   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6182 
6183   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6184   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6185   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6186   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6187 
6188   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6189   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6190   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6191   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6192   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6193   PetscFunctionReturn(0);
6194 }
6195 
6196 /*
6197     Special version for direct calls from Fortran
6198 */
6199 #include <petsc/private/fortranimpl.h>
6200 
6201 /* Change these macros so can be used in void function */
6202 #undef CHKERRQ
6203 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6204 #undef SETERRQ2
6205 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6206 #undef SETERRQ3
6207 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6208 #undef SETERRQ
6209 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6210 
6211 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6212 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6213 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6214 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6215 #else
6216 #endif
6217 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6218 {
6219   Mat            mat  = *mmat;
6220   PetscInt       m    = *mm, n = *mn;
6221   InsertMode     addv = *maddv;
6222   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6223   PetscScalar    value;
6224   PetscErrorCode ierr;
6225 
6226   MatCheckPreallocated(mat,1);
6227   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6228   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6229   {
6230     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6231     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6232     PetscBool roworiented = aij->roworiented;
6233 
6234     /* Some Variables required in the macro */
6235     Mat        A                    = aij->A;
6236     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6237     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6238     MatScalar  *aa                  = a->a;
6239     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6240     Mat        B                    = aij->B;
6241     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6242     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6243     MatScalar  *ba                  = b->a;
6244     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6245      * cannot use "#if defined" inside a macro. */
6246     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6247 
6248     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6249     PetscInt  nonew = a->nonew;
6250     MatScalar *ap1,*ap2;
6251 
6252     PetscFunctionBegin;
6253     for (i=0; i<m; i++) {
6254       if (im[i] < 0) continue;
6255       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6256       if (im[i] >= rstart && im[i] < rend) {
6257         row      = im[i] - rstart;
6258         lastcol1 = -1;
6259         rp1      = aj + ai[row];
6260         ap1      = aa + ai[row];
6261         rmax1    = aimax[row];
6262         nrow1    = ailen[row];
6263         low1     = 0;
6264         high1    = nrow1;
6265         lastcol2 = -1;
6266         rp2      = bj + bi[row];
6267         ap2      = ba + bi[row];
6268         rmax2    = bimax[row];
6269         nrow2    = bilen[row];
6270         low2     = 0;
6271         high2    = nrow2;
6272 
6273         for (j=0; j<n; j++) {
6274           if (roworiented) value = v[i*n+j];
6275           else value = v[i+j*m];
6276           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6277           if (in[j] >= cstart && in[j] < cend) {
6278             col = in[j] - cstart;
6279             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6280 #if defined(PETSC_HAVE_DEVICE)
6281             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6282 #endif
6283           } else if (in[j] < 0) continue;
6284           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6285             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6286             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6287           } else {
6288             if (mat->was_assembled) {
6289               if (!aij->colmap) {
6290                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6291               }
6292 #if defined(PETSC_USE_CTABLE)
6293               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6294               col--;
6295 #else
6296               col = aij->colmap[in[j]] - 1;
6297 #endif
6298               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6299                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6300                 col  =  in[j];
6301                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6302                 B        = aij->B;
6303                 b        = (Mat_SeqAIJ*)B->data;
6304                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6305                 rp2      = bj + bi[row];
6306                 ap2      = ba + bi[row];
6307                 rmax2    = bimax[row];
6308                 nrow2    = bilen[row];
6309                 low2     = 0;
6310                 high2    = nrow2;
6311                 bm       = aij->B->rmap->n;
6312                 ba       = b->a;
6313                 inserted = PETSC_FALSE;
6314               }
6315             } else col = in[j];
6316             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6317 #if defined(PETSC_HAVE_DEVICE)
6318             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6319 #endif
6320           }
6321         }
6322       } else if (!aij->donotstash) {
6323         if (roworiented) {
6324           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6325         } else {
6326           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6327         }
6328       }
6329     }
6330   }
6331   PetscFunctionReturnVoid();
6332 }
6333