xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d60b7d5cda9a3148fab971d2544cf20a2ffedfa6)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to access.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_DEVICE)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_DEVICE)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_DEVICE)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800 
801   PetscFunctionBegin;
802   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
803 
804   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
805   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
806   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
807   PetscFunctionReturn(0);
808 }
809 
810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
811 {
812   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
813   PetscErrorCode ierr;
814   PetscMPIInt    n;
815   PetscInt       i,j,rstart,ncols,flg;
816   PetscInt       *row,*col;
817   PetscBool      other_disassembled;
818   PetscScalar    *val;
819 
820   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
821 
822   PetscFunctionBegin;
823   if (!aij->donotstash && !mat->nooffprocentries) {
824     while (1) {
825       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
826       if (!flg) break;
827 
828       for (i=0; i<n;) {
829         /* Now identify the consecutive vals belonging to the same row */
830         for (j=i,rstart=row[j]; j<n; j++) {
831           if (row[j] != rstart) break;
832         }
833         if (j < n) ncols = j-i;
834         else       ncols = n-i;
835         /* Now assemble all these values with a single function call */
836         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
837         i    = j;
838       }
839     }
840     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
841   }
842 #if defined(PETSC_HAVE_DEVICE)
843   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
844   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
845   if (mat->boundtocpu) {
846     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
847     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
848   }
849 #endif
850   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
851   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
852 
853   /* determine if any processor has disassembled, if so we must
854      also disassemble ourself, in order that we may reassemble. */
855   /*
856      if nonzero structure of submatrix B cannot change then we know that
857      no processor disassembled thus we can skip this stuff
858   */
859   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
860     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
861     if (mat->was_assembled && !other_disassembled) {
862 #if defined(PETSC_HAVE_DEVICE)
863       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
864 #endif
865       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
866     }
867   }
868   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
869     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
870   }
871   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
872 #if defined(PETSC_HAVE_DEVICE)
873   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
874 #endif
875   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
876   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
877 
878   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
879 
880   aij->rowvalues = NULL;
881 
882   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
883 
884   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
885   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
886     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
887     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
888   }
889 #if defined(PETSC_HAVE_DEVICE)
890   mat->offloadmask = PETSC_OFFLOAD_BOTH;
891 #endif
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
896 {
897   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
902   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
909   PetscObjectState sA, sB;
910   PetscInt        *lrows;
911   PetscInt         r, len;
912   PetscBool        cong, lch, gch;
913   PetscErrorCode   ierr;
914 
915   PetscFunctionBegin;
916   /* get locally owned rows */
917   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
918   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
919   /* fix right hand side if needed */
920   if (x && b) {
921     const PetscScalar *xx;
922     PetscScalar       *bb;
923 
924     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
925     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
926     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
927     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
928     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
929     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
930   }
931 
932   sA = mat->A->nonzerostate;
933   sB = mat->B->nonzerostate;
934 
935   if (diag != 0.0 && cong) {
936     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
937     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
938   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
939     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
940     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
941     PetscInt   nnwA, nnwB;
942     PetscBool  nnzA, nnzB;
943 
944     nnwA = aijA->nonew;
945     nnwB = aijB->nonew;
946     nnzA = aijA->keepnonzeropattern;
947     nnzB = aijB->keepnonzeropattern;
948     if (!nnzA) {
949       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
950       aijA->nonew = 0;
951     }
952     if (!nnzB) {
953       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
954       aijB->nonew = 0;
955     }
956     /* Must zero here before the next loop */
957     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
959     for (r = 0; r < len; ++r) {
960       const PetscInt row = lrows[r] + A->rmap->rstart;
961       if (row >= A->cmap->N) continue;
962       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
963     }
964     aijA->nonew = nnwA;
965     aijB->nonew = nnwB;
966   } else {
967     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
968     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
969   }
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973 
974   /* reduce nonzerostate */
975   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
976   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
977   if (gch) A->nonzerostate++;
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
982 {
983   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode    ierr;
985   PetscMPIInt       n = A->rmap->n;
986   PetscInt          i,j,r,m,len = 0;
987   PetscInt          *lrows,*owners = A->rmap->range;
988   PetscMPIInt       p = 0;
989   PetscSFNode       *rrows;
990   PetscSF           sf;
991   const PetscScalar *xx;
992   PetscScalar       *bb,*mask;
993   Vec               xmask,lmask;
994   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
995   const PetscInt    *aj, *ii,*ridx;
996   PetscScalar       *aa;
997 
998   PetscFunctionBegin;
999   /* Create SF where leaves are input rows and roots are owned rows */
1000   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1001   for (r = 0; r < n; ++r) lrows[r] = -1;
1002   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1003   for (r = 0; r < N; ++r) {
1004     const PetscInt idx   = rows[r];
1005     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1006     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1007       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1008     }
1009     rrows[r].rank  = p;
1010     rrows[r].index = rows[r] - owners[p];
1011   }
1012   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1013   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1014   /* Collect flags for rows to be zeroed */
1015   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1016   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1017   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1018   /* Compress and put in row numbers */
1019   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1020   /* zero diagonal part of matrix */
1021   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1022   /* handle off diagonal part of matrix */
1023   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x && b) { /* this code is buggy when the row and column layout don't match */
1032     PetscBool cong;
1033 
1034     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1035     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1036     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1039     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1040   }
1041   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1042   /* remove zeroed rows of off diagonal matrix */
1043   ii = aij->i;
1044   for (i=0; i<len; i++) {
1045     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1046   }
1047   /* loop over all elements of off process part of matrix zeroing removed columns*/
1048   if (aij->compressedrow.use) {
1049     m    = aij->compressedrow.nrows;
1050     ii   = aij->compressedrow.i;
1051     ridx = aij->compressedrow.rindex;
1052     for (i=0; i<m; i++) {
1053       n  = ii[i+1] - ii[i];
1054       aj = aij->j + ii[i];
1055       aa = aij->a + ii[i];
1056 
1057       for (j=0; j<n; j++) {
1058         if (PetscAbsScalar(mask[*aj])) {
1059           if (b) bb[*ridx] -= *aa*xx[*aj];
1060           *aa = 0.0;
1061         }
1062         aa++;
1063         aj++;
1064       }
1065       ridx++;
1066     }
1067   } else { /* do not use compressed row format */
1068     m = l->B->rmap->n;
1069     for (i=0; i<m; i++) {
1070       n  = ii[i+1] - ii[i];
1071       aj = aij->j + ii[i];
1072       aa = aij->a + ii[i];
1073       for (j=0; j<n; j++) {
1074         if (PetscAbsScalar(mask[*aj])) {
1075           if (b) bb[i] -= *aa*xx[*aj];
1076           *aa = 0.0;
1077         }
1078         aa++;
1079         aj++;
1080       }
1081     }
1082   }
1083   if (x && b) {
1084     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1085     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1086   }
1087   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1088   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1089   ierr = PetscFree(lrows);CHKERRQ(ierr);
1090 
1091   /* only change matrix nonzero state if pattern was allowed to be changed */
1092   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1093     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1094     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1100 {
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102   PetscErrorCode ierr;
1103   PetscInt       nt;
1104   VecScatter     Mvctx = a->Mvctx;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1108   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1109   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1111   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1127 {
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129   PetscErrorCode ierr;
1130   VecScatter     Mvctx = a->Mvctx;
1131 
1132   PetscFunctionBegin;
1133   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1135   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1141 {
1142   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1143   PetscErrorCode ierr;
1144 
1145   PetscFunctionBegin;
1146   /* do nondiagonal part */
1147   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1148   /* do local part */
1149   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1150   /* add partial results together */
1151   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1152   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1157 {
1158   MPI_Comm       comm;
1159   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1160   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1161   IS             Me,Notme;
1162   PetscErrorCode ierr;
1163   PetscInt       M,N,first,last,*notme,i;
1164   PetscBool      lf;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168   /* Easy test: symmetric diagonal block */
1169   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1170   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1171   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1199 {
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1208 {
1209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   /* do nondiagonal part */
1214   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1215   /* do local part */
1216   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1217   /* add partial results together */
1218   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /*
1224   This only works correctly for square matrices where the subblock A->A is the
1225    diagonal block
1226 */
1227 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1228 {
1229   PetscErrorCode ierr;
1230   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1231 
1232   PetscFunctionBegin;
1233   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1234   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1235   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1240 {
1241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1242   PetscErrorCode ierr;
1243 
1244   PetscFunctionBegin;
1245   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1246   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1251 {
1252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256 #if defined(PETSC_USE_LOG)
1257   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1258 #endif
1259   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1263 #if defined(PETSC_USE_CTABLE)
1264   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1265 #else
1266   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1267 #endif
1268   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1269   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1270   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1271   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1272   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1273   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1274 
1275   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1276   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1277 
1278   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1288 #if defined(PETSC_HAVE_CUDA)
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1290 #endif
1291 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1293 #endif
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1310 #if defined(PETSC_HAVE_MKL_SPARSE)
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1312 #endif
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1320 {
1321   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1322   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1323   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1324   const PetscInt    *garray = aij->garray;
1325   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1326   PetscInt          *rowlens;
1327   PetscInt          *colidxs;
1328   PetscScalar       *matvals;
1329   PetscErrorCode    ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1333 
1334   M  = mat->rmap->N;
1335   N  = mat->cmap->N;
1336   m  = mat->rmap->n;
1337   rs = mat->rmap->rstart;
1338   cs = mat->cmap->rstart;
1339   nz = A->nz + B->nz;
1340 
1341   /* write matrix header */
1342   header[0] = MAT_FILE_CLASSID;
1343   header[1] = M; header[2] = N; header[3] = nz;
1344   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1345   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1346 
1347   /* fill in and store row lengths  */
1348   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1349   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1350   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1351   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1352 
1353   /* fill in and store column indices */
1354   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1355   for (cnt=0, i=0; i<m; i++) {
1356     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1357       if (garray[B->j[jb]] > cs) break;
1358       colidxs[cnt++] = garray[B->j[jb]];
1359     }
1360     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1361       colidxs[cnt++] = A->j[ja] + cs;
1362     for (; jb<B->i[i+1]; jb++)
1363       colidxs[cnt++] = garray[B->j[jb]];
1364   }
1365   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1366   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1367   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1368 
1369   /* fill in and store nonzero values */
1370   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1371   for (cnt=0, i=0; i<m; i++) {
1372     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1373       if (garray[B->j[jb]] > cs) break;
1374       matvals[cnt++] = B->a[jb];
1375     }
1376     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1377       matvals[cnt++] = A->a[ja];
1378     for (; jb<B->i[i+1]; jb++)
1379       matvals[cnt++] = B->a[jb];
1380   }
1381   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1382   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1383   ierr = PetscFree(matvals);CHKERRQ(ierr);
1384 
1385   /* write block size option to the viewer's .info file */
1386   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #include <petscdraw.h>
1391 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1392 {
1393   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1394   PetscErrorCode    ierr;
1395   PetscMPIInt       rank = aij->rank,size = aij->size;
1396   PetscBool         isdraw,iascii,isbinary;
1397   PetscViewer       sviewer;
1398   PetscViewerFormat format;
1399 
1400   PetscFunctionBegin;
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1402   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1404   if (iascii) {
1405     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1406     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1407       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1408       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1409       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1410       for (i=0; i<(PetscInt)size; i++) {
1411         nmax = PetscMax(nmax,nz[i]);
1412         nmin = PetscMin(nmin,nz[i]);
1413         navg += nz[i];
1414       }
1415       ierr = PetscFree(nz);CHKERRQ(ierr);
1416       navg = navg/size;
1417       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1418       PetscFunctionReturn(0);
1419     }
1420     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1421     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1422       MatInfo   info;
1423       PetscBool inodes;
1424 
1425       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1426       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1427       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1429       if (!inodes) {
1430         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1431                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1432       } else {
1433         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1434                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1435       }
1436       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1437       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1438       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1440       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1441       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1442       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1443       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1444       PetscFunctionReturn(0);
1445     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1446       PetscInt inodecount,inodelimit,*inodes;
1447       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1448       if (inodes) {
1449         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1450       } else {
1451         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1452       }
1453       PetscFunctionReturn(0);
1454     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1455       PetscFunctionReturn(0);
1456     }
1457   } else if (isbinary) {
1458     if (size == 1) {
1459       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1460       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1461     } else {
1462       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1463     }
1464     PetscFunctionReturn(0);
1465   } else if (iascii && size == 1) {
1466     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1467     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1468     PetscFunctionReturn(0);
1469   } else if (isdraw) {
1470     PetscDraw draw;
1471     PetscBool isnull;
1472     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1473     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1474     if (isnull) PetscFunctionReturn(0);
1475   }
1476 
1477   { /* assemble the entire matrix onto first processor */
1478     Mat A = NULL, Av;
1479     IS  isrow,iscol;
1480 
1481     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1482     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1483     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1484     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1485 /*  The commented code uses MatCreateSubMatrices instead */
1486 /*
1487     Mat *AA, A = NULL, Av;
1488     IS  isrow,iscol;
1489 
1490     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1491     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1492     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1493     if (!rank) {
1494        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1495        A    = AA[0];
1496        Av   = AA[0];
1497     }
1498     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1499 */
1500     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1501     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1502     /*
1503        Everyone has to call to draw the matrix since the graphics waits are
1504        synchronized across all processors that share the PetscDraw object
1505     */
1506     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1507     if (!rank) {
1508       if (((PetscObject)mat)->name) {
1509         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1510       }
1511       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1512     }
1513     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1514     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1515     ierr = MatDestroy(&A);CHKERRQ(ierr);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1521 {
1522   PetscErrorCode ierr;
1523   PetscBool      iascii,isdraw,issocket,isbinary;
1524 
1525   PetscFunctionBegin;
1526   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1527   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1528   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1529   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1530   if (iascii || isdraw || isbinary || issocket) {
1531     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1537 {
1538   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1539   PetscErrorCode ierr;
1540   Vec            bb1 = NULL;
1541   PetscBool      hasop;
1542 
1543   PetscFunctionBegin;
1544   if (flag == SOR_APPLY_UPPER) {
1545     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1546     PetscFunctionReturn(0);
1547   }
1548 
1549   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1550     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1551   }
1552 
1553   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1554     if (flag & SOR_ZERO_INITIAL_GUESS) {
1555       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1556       its--;
1557     }
1558 
1559     while (its--) {
1560       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1561       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1562 
1563       /* update rhs: bb1 = bb - B*x */
1564       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1565       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1566 
1567       /* local sweep */
1568       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1569     }
1570   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1571     if (flag & SOR_ZERO_INITIAL_GUESS) {
1572       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1573       its--;
1574     }
1575     while (its--) {
1576       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578 
1579       /* update rhs: bb1 = bb - B*x */
1580       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1581       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1582 
1583       /* local sweep */
1584       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1585     }
1586   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1587     if (flag & SOR_ZERO_INITIAL_GUESS) {
1588       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1589       its--;
1590     }
1591     while (its--) {
1592       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1594 
1595       /* update rhs: bb1 = bb - B*x */
1596       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1597       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1598 
1599       /* local sweep */
1600       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1601     }
1602   } else if (flag & SOR_EISENSTAT) {
1603     Vec xx1;
1604 
1605     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1607 
1608     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1610     if (!mat->diag) {
1611       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1612       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1613     }
1614     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1615     if (hasop) {
1616       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1617     } else {
1618       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1619     }
1620     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1621 
1622     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1623 
1624     /* local sweep */
1625     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1626     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1627     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1628   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1629 
1630   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1631 
1632   matin->factorerrortype = mat->A->factorerrortype;
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1637 {
1638   Mat            aA,aB,Aperm;
1639   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1640   PetscScalar    *aa,*ba;
1641   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1642   PetscSF        rowsf,sf;
1643   IS             parcolp = NULL;
1644   PetscBool      done;
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1649   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1650   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1651   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1652 
1653   /* Invert row permutation to find out where my rows should go */
1654   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1655   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1656   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1658   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1659   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1660 
1661   /* Invert column permutation to find out where my columns should go */
1662   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1663   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1664   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1665   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1666   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1667   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1668   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1669 
1670   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1671   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1672   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1673 
1674   /* Find out where my gcols should go */
1675   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1676   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1677   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1678   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1679   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1680   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1681   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1682   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1683 
1684   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1685   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1686   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) {
1688     PetscInt    row = rdest[i];
1689     PetscMPIInt rowner;
1690     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1691     for (j=ai[i]; j<ai[i+1]; j++) {
1692       PetscInt    col = cdest[aj[j]];
1693       PetscMPIInt cowner;
1694       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1695       if (rowner == cowner) dnnz[i]++;
1696       else onnz[i]++;
1697     }
1698     for (j=bi[i]; j<bi[i+1]; j++) {
1699       PetscInt    col = gcdest[bj[j]];
1700       PetscMPIInt cowner;
1701       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1702       if (rowner == cowner) dnnz[i]++;
1703       else onnz[i]++;
1704     }
1705   }
1706   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1707   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1708   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1709   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1710   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1711 
1712   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1713   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1714   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1715   for (i=0; i<m; i++) {
1716     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1717     PetscInt j0,rowlen;
1718     rowlen = ai[i+1] - ai[i];
1719     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1720       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1721       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1722     }
1723     rowlen = bi[i+1] - bi[i];
1724     for (j0=j=0; j<rowlen; j0=j) {
1725       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1726       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1727     }
1728   }
1729   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1730   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1731   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1732   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1733   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1734   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1735   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1736   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1737   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1738   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1739   *B = Aperm;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1744 {
1745   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1746   PetscErrorCode ierr;
1747 
1748   PetscFunctionBegin;
1749   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1750   if (ghosts) *ghosts = aij->garray;
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1755 {
1756   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1757   Mat            A    = mat->A,B = mat->B;
1758   PetscErrorCode ierr;
1759   PetscLogDouble isend[5],irecv[5];
1760 
1761   PetscFunctionBegin;
1762   info->block_size = 1.0;
1763   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1764 
1765   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1766   isend[3] = info->memory;  isend[4] = info->mallocs;
1767 
1768   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1769 
1770   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1771   isend[3] += info->memory;  isend[4] += info->mallocs;
1772   if (flag == MAT_LOCAL) {
1773     info->nz_used      = isend[0];
1774     info->nz_allocated = isend[1];
1775     info->nz_unneeded  = isend[2];
1776     info->memory       = isend[3];
1777     info->mallocs      = isend[4];
1778   } else if (flag == MAT_GLOBAL_MAX) {
1779     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1780 
1781     info->nz_used      = irecv[0];
1782     info->nz_allocated = irecv[1];
1783     info->nz_unneeded  = irecv[2];
1784     info->memory       = irecv[3];
1785     info->mallocs      = irecv[4];
1786   } else if (flag == MAT_GLOBAL_SUM) {
1787     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1788 
1789     info->nz_used      = irecv[0];
1790     info->nz_allocated = irecv[1];
1791     info->nz_unneeded  = irecv[2];
1792     info->memory       = irecv[3];
1793     info->mallocs      = irecv[4];
1794   }
1795   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1796   info->fill_ratio_needed = 0;
1797   info->factor_mallocs    = 0;
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1802 {
1803   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1804   PetscErrorCode ierr;
1805 
1806   PetscFunctionBegin;
1807   switch (op) {
1808   case MAT_NEW_NONZERO_LOCATIONS:
1809   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1810   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1811   case MAT_KEEP_NONZERO_PATTERN:
1812   case MAT_NEW_NONZERO_LOCATION_ERR:
1813   case MAT_USE_INODES:
1814   case MAT_IGNORE_ZERO_ENTRIES:
1815     MatCheckPreallocated(A,1);
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_ROW_ORIENTED:
1820     MatCheckPreallocated(A,1);
1821     a->roworiented = flg;
1822 
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1825     break;
1826   case MAT_FORCE_DIAGONAL_ENTRIES:
1827   case MAT_SORTED_FULL:
1828     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1829     break;
1830   case MAT_IGNORE_OFF_PROC_ENTRIES:
1831     a->donotstash = flg;
1832     break;
1833   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1834   case MAT_SPD:
1835   case MAT_SYMMETRIC:
1836   case MAT_STRUCTURALLY_SYMMETRIC:
1837   case MAT_HERMITIAN:
1838   case MAT_SYMMETRY_ETERNAL:
1839     break;
1840   case MAT_SUBMAT_SINGLEIS:
1841     A->submat_singleis = flg;
1842     break;
1843   case MAT_STRUCTURE_ONLY:
1844     /* The option is handled directly by MatSetOption() */
1845     break;
1846   default:
1847     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1848   }
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1853 {
1854   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1855   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1856   PetscErrorCode ierr;
1857   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1858   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1859   PetscInt       *cmap,*idx_p;
1860 
1861   PetscFunctionBegin;
1862   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1863   mat->getrowactive = PETSC_TRUE;
1864 
1865   if (!mat->rowvalues && (idx || v)) {
1866     /*
1867         allocate enough space to hold information from the longest row.
1868     */
1869     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1870     PetscInt   max = 1,tmp;
1871     for (i=0; i<matin->rmap->n; i++) {
1872       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1873       if (max < tmp) max = tmp;
1874     }
1875     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1876   }
1877 
1878   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1879   lrow = row - rstart;
1880 
1881   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1882   if (!v)   {pvA = NULL; pvB = NULL;}
1883   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1884   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1885   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1886   nztot = nzA + nzB;
1887 
1888   cmap = mat->garray;
1889   if (v  || idx) {
1890     if (nztot) {
1891       /* Sort by increasing column numbers, assuming A and B already sorted */
1892       PetscInt imark = -1;
1893       if (v) {
1894         *v = v_p = mat->rowvalues;
1895         for (i=0; i<nzB; i++) {
1896           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1897           else break;
1898         }
1899         imark = i;
1900         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1901         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1902       }
1903       if (idx) {
1904         *idx = idx_p = mat->rowindices;
1905         if (imark > -1) {
1906           for (i=0; i<imark; i++) {
1907             idx_p[i] = cmap[cworkB[i]];
1908           }
1909         } else {
1910           for (i=0; i<nzB; i++) {
1911             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1912             else break;
1913           }
1914           imark = i;
1915         }
1916         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1917         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1918       }
1919     } else {
1920       if (idx) *idx = NULL;
1921       if (v)   *v   = NULL;
1922     }
1923   }
1924   *nz  = nztot;
1925   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1931 {
1932   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1933 
1934   PetscFunctionBegin;
1935   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1936   aij->getrowactive = PETSC_FALSE;
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1941 {
1942   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1943   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1944   PetscErrorCode ierr;
1945   PetscInt       i,j,cstart = mat->cmap->rstart;
1946   PetscReal      sum = 0.0;
1947   MatScalar      *v;
1948 
1949   PetscFunctionBegin;
1950   if (aij->size == 1) {
1951     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1952   } else {
1953     if (type == NORM_FROBENIUS) {
1954       v = amat->a;
1955       for (i=0; i<amat->nz; i++) {
1956         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1957       }
1958       v = bmat->a;
1959       for (i=0; i<bmat->nz; i++) {
1960         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1961       }
1962       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1963       *norm = PetscSqrtReal(*norm);
1964       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1965     } else if (type == NORM_1) { /* max column norm */
1966       PetscReal *tmp,*tmp2;
1967       PetscInt  *jj,*garray = aij->garray;
1968       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1969       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1970       *norm = 0.0;
1971       v     = amat->a; jj = amat->j;
1972       for (j=0; j<amat->nz; j++) {
1973         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1974       }
1975       v = bmat->a; jj = bmat->j;
1976       for (j=0; j<bmat->nz; j++) {
1977         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1978       }
1979       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1980       for (j=0; j<mat->cmap->N; j++) {
1981         if (tmp2[j] > *norm) *norm = tmp2[j];
1982       }
1983       ierr = PetscFree(tmp);CHKERRQ(ierr);
1984       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1985       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1986     } else if (type == NORM_INFINITY) { /* max row norm */
1987       PetscReal ntemp = 0.0;
1988       for (j=0; j<aij->A->rmap->n; j++) {
1989         v   = amat->a + amat->i[j];
1990         sum = 0.0;
1991         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1992           sum += PetscAbsScalar(*v); v++;
1993         }
1994         v = bmat->a + bmat->i[j];
1995         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1996           sum += PetscAbsScalar(*v); v++;
1997         }
1998         if (sum > ntemp) ntemp = sum;
1999       }
2000       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2001       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2002     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2008 {
2009   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2010   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2011   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2012   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2013   PetscErrorCode  ierr;
2014   Mat             B,A_diag,*B_diag;
2015   const MatScalar *array;
2016 
2017   PetscFunctionBegin;
2018   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2019   ai = Aloc->i; aj = Aloc->j;
2020   bi = Bloc->i; bj = Bloc->j;
2021   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2022     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2023     PetscSFNode          *oloc;
2024     PETSC_UNUSED PetscSF sf;
2025 
2026     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2027     /* compute d_nnz for preallocation */
2028     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2029     for (i=0; i<ai[ma]; i++) {
2030       d_nnz[aj[i]]++;
2031     }
2032     /* compute local off-diagonal contributions */
2033     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2034     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2035     /* map those to global */
2036     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2037     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2038     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2039     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2040     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2041     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2042     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2043 
2044     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2045     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2046     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2047     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2048     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2049     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2050   } else {
2051     B    = *matout;
2052     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2053   }
2054 
2055   b           = (Mat_MPIAIJ*)B->data;
2056   A_diag      = a->A;
2057   B_diag      = &b->A;
2058   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2059   A_diag_ncol = A_diag->cmap->N;
2060   B_diag_ilen = sub_B_diag->ilen;
2061   B_diag_i    = sub_B_diag->i;
2062 
2063   /* Set ilen for diagonal of B */
2064   for (i=0; i<A_diag_ncol; i++) {
2065     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2066   }
2067 
2068   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2069   very quickly (=without using MatSetValues), because all writes are local. */
2070   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2071 
2072   /* copy over the B part */
2073   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2074   array = Bloc->a;
2075   row   = A->rmap->rstart;
2076   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2077   cols_tmp = cols;
2078   for (i=0; i<mb; i++) {
2079     ncol = bi[i+1]-bi[i];
2080     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2081     row++;
2082     array += ncol; cols_tmp += ncol;
2083   }
2084   ierr = PetscFree(cols);CHKERRQ(ierr);
2085 
2086   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2087   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2089     *matout = B;
2090   } else {
2091     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2092   }
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2097 {
2098   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2099   Mat            a    = aij->A,b = aij->B;
2100   PetscErrorCode ierr;
2101   PetscInt       s1,s2,s3;
2102 
2103   PetscFunctionBegin;
2104   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2105   if (rr) {
2106     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2107     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2108     /* Overlap communication with computation. */
2109     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2110   }
2111   if (ll) {
2112     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2113     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2114     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2115   }
2116   /* scale  the diagonal block */
2117   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2118 
2119   if (rr) {
2120     /* Do a scatter end and then right scale the off-diagonal block */
2121     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2122     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2123   }
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2128 {
2129   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2138 {
2139   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2140   Mat            a,b,c,d;
2141   PetscBool      flg;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   a = matA->A; b = matA->B;
2146   c = matB->A; d = matB->B;
2147 
2148   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2149   if (flg) {
2150     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2151   }
2152   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2157 {
2158   PetscErrorCode ierr;
2159   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2160   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2161 
2162   PetscFunctionBegin;
2163   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2164   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2165     /* because of the column compression in the off-processor part of the matrix a->B,
2166        the number of columns in a->B and b->B may be different, hence we cannot call
2167        the MatCopy() directly on the two parts. If need be, we can provide a more
2168        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2169        then copying the submatrices */
2170     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2171   } else {
2172     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2173     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2174   }
2175   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2180 {
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 /*
2189    Computes the number of nonzeros per row needed for preallocation when X and Y
2190    have different nonzero structure.
2191 */
2192 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2193 {
2194   PetscInt       i,j,k,nzx,nzy;
2195 
2196   PetscFunctionBegin;
2197   /* Set the number of nonzeros in the new matrix */
2198   for (i=0; i<m; i++) {
2199     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2200     nzx = xi[i+1] - xi[i];
2201     nzy = yi[i+1] - yi[i];
2202     nnz[i] = 0;
2203     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2204       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2205       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2206       nnz[i]++;
2207     }
2208     for (; k<nzy; k++) nnz[i]++;
2209   }
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2214 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2215 {
2216   PetscErrorCode ierr;
2217   PetscInt       m = Y->rmap->N;
2218   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2219   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2220 
2221   PetscFunctionBegin;
2222   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2227 {
2228   PetscErrorCode ierr;
2229   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2230   PetscBLASInt   bnz,one=1;
2231   Mat_SeqAIJ     *x,*y;
2232 
2233   PetscFunctionBegin;
2234   if (str == SAME_NONZERO_PATTERN) {
2235     PetscScalar alpha = a;
2236     x    = (Mat_SeqAIJ*)xx->A->data;
2237     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2238     y    = (Mat_SeqAIJ*)yy->A->data;
2239     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2240     x    = (Mat_SeqAIJ*)xx->B->data;
2241     y    = (Mat_SeqAIJ*)yy->B->data;
2242     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2243     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2244     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2245     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2246        will be updated */
2247 #if defined(PETSC_HAVE_DEVICE)
2248     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2249       Y->offloadmask = PETSC_OFFLOAD_CPU;
2250     }
2251 #endif
2252   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2253     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2254   } else {
2255     Mat      B;
2256     PetscInt *nnz_d,*nnz_o;
2257     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2258     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2259     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2260     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2261     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2262     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2263     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2264     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2265     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2266     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2267     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2268     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2269     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2275 
2276 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2277 {
2278 #if defined(PETSC_USE_COMPLEX)
2279   PetscErrorCode ierr;
2280   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2281 
2282   PetscFunctionBegin;
2283   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2284   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2285 #else
2286   PetscFunctionBegin;
2287 #endif
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2292 {
2293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2298   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2303 {
2304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2305   PetscErrorCode ierr;
2306 
2307   PetscFunctionBegin;
2308   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2309   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2314 {
2315   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2316   PetscErrorCode    ierr;
2317   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2318   PetscScalar       *va,*vv;
2319   Vec               vB,vA;
2320   const PetscScalar *vb;
2321 
2322   PetscFunctionBegin;
2323   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2324   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2325 
2326   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2327   if (idx) {
2328     for (i=0; i<m; i++) {
2329       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2330     }
2331   }
2332 
2333   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2334   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2335   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2336 
2337   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2338   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2339   for (i=0; i<m; i++) {
2340     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2341       vv[i] = vb[i];
2342       if (idx) idx[i] = a->garray[idxb[i]];
2343     } else {
2344       vv[i] = va[i];
2345       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2346         idx[i] = a->garray[idxb[i]];
2347     }
2348   }
2349   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2350   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2351   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2352   ierr = PetscFree(idxb);CHKERRQ(ierr);
2353   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2354   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2355   PetscFunctionReturn(0);
2356 }
2357 
2358 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2359 {
2360   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2361   PetscInt       m = A->rmap->n,n = A->cmap->n;
2362   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2363   PetscInt       *cmap  = mat->garray;
2364   PetscInt       *diagIdx, *offdiagIdx;
2365   Vec            diagV, offdiagV;
2366   PetscScalar    *a, *diagA, *offdiagA, *ba;
2367   PetscInt       r,j,col,ncols,*bi,*bj;
2368   PetscErrorCode ierr;
2369   Mat            B = mat->B;
2370   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2371 
2372   PetscFunctionBegin;
2373   /* When a process holds entire A and other processes have no entry */
2374   if (A->cmap->N == n) {
2375     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2376     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2377     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2378     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2379     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2380     PetscFunctionReturn(0);
2381   } else if (n == 0) {
2382     if (m) {
2383       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2384       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2385       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2386     }
2387     PetscFunctionReturn(0);
2388   }
2389 
2390   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2391   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2392   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2393   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2394 
2395   /* Get offdiagIdx[] for implicit 0.0 */
2396   ba = b->a;
2397   bi = b->i;
2398   bj = b->j;
2399   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2400   for (r = 0; r < m; r++) {
2401     ncols = bi[r+1] - bi[r];
2402     if (ncols == A->cmap->N - n) { /* Brow is dense */
2403       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2404     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2405       offdiagA[r] = 0.0;
2406 
2407       /* Find first hole in the cmap */
2408       for (j=0; j<ncols; j++) {
2409         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2410         if (col > j && j < cstart) {
2411           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2412           break;
2413         } else if (col > j + n && j >= cstart) {
2414           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2415           break;
2416         }
2417       }
2418       if (j == ncols && ncols < A->cmap->N - n) {
2419         /* a hole is outside compressed Bcols */
2420         if (ncols == 0) {
2421           if (cstart) {
2422             offdiagIdx[r] = 0;
2423           } else offdiagIdx[r] = cend;
2424         } else { /* ncols > 0 */
2425           offdiagIdx[r] = cmap[ncols-1] + 1;
2426           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2427         }
2428       }
2429     }
2430 
2431     for (j=0; j<ncols; j++) {
2432       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2433       ba++; bj++;
2434     }
2435   }
2436 
2437   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2438   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2439   for (r = 0; r < m; ++r) {
2440     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2441       a[r]   = diagA[r];
2442       if (idx) idx[r] = cstart + diagIdx[r];
2443     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2444       a[r] = diagA[r];
2445       if (idx) {
2446         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2447           idx[r] = cstart + diagIdx[r];
2448         } else idx[r] = offdiagIdx[r];
2449       }
2450     } else {
2451       a[r]   = offdiagA[r];
2452       if (idx) idx[r] = offdiagIdx[r];
2453     }
2454   }
2455   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2456   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2457   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2458   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2459   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2460   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2465 {
2466   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2467   PetscInt       m = A->rmap->n,n = A->cmap->n;
2468   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2469   PetscInt       *cmap  = mat->garray;
2470   PetscInt       *diagIdx, *offdiagIdx;
2471   Vec            diagV, offdiagV;
2472   PetscScalar    *a, *diagA, *offdiagA, *ba;
2473   PetscInt       r,j,col,ncols,*bi,*bj;
2474   PetscErrorCode ierr;
2475   Mat            B = mat->B;
2476   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2477 
2478   PetscFunctionBegin;
2479   /* When a process holds entire A and other processes have no entry */
2480   if (A->cmap->N == n) {
2481     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2482     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2483     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2484     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2485     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2486     PetscFunctionReturn(0);
2487   } else if (n == 0) {
2488     if (m) {
2489       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2490       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2491       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2492     }
2493     PetscFunctionReturn(0);
2494   }
2495 
2496   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2497   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2498   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2499   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2500 
2501   /* Get offdiagIdx[] for implicit 0.0 */
2502   ba = b->a;
2503   bi = b->i;
2504   bj = b->j;
2505   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   for (r = 0; r < m; r++) {
2507     ncols = bi[r+1] - bi[r];
2508     if (ncols == A->cmap->N - n) { /* Brow is dense */
2509       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2510     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2511       offdiagA[r] = 0.0;
2512 
2513       /* Find first hole in the cmap */
2514       for (j=0; j<ncols; j++) {
2515         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2516         if (col > j && j < cstart) {
2517           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2518           break;
2519         } else if (col > j + n && j >= cstart) {
2520           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2521           break;
2522         }
2523       }
2524       if (j == ncols && ncols < A->cmap->N - n) {
2525         /* a hole is outside compressed Bcols */
2526         if (ncols == 0) {
2527           if (cstart) {
2528             offdiagIdx[r] = 0;
2529           } else offdiagIdx[r] = cend;
2530         } else { /* ncols > 0 */
2531           offdiagIdx[r] = cmap[ncols-1] + 1;
2532           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2533         }
2534       }
2535     }
2536 
2537     for (j=0; j<ncols; j++) {
2538       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2539       ba++; bj++;
2540     }
2541   }
2542 
2543   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2544   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2545   for (r = 0; r < m; ++r) {
2546     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2547       a[r]   = diagA[r];
2548       if (idx) idx[r] = cstart + diagIdx[r];
2549     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2550       a[r] = diagA[r];
2551       if (idx) {
2552         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2553           idx[r] = cstart + diagIdx[r];
2554         } else idx[r] = offdiagIdx[r];
2555       }
2556     } else {
2557       a[r]   = offdiagA[r];
2558       if (idx) idx[r] = offdiagIdx[r];
2559     }
2560   }
2561   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2562   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2563   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2564   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2565   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2566   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2571 {
2572   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2573   PetscInt       m = A->rmap->n,n = A->cmap->n;
2574   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2575   PetscInt       *cmap  = mat->garray;
2576   PetscInt       *diagIdx, *offdiagIdx;
2577   Vec            diagV, offdiagV;
2578   PetscScalar    *a, *diagA, *offdiagA, *ba;
2579   PetscInt       r,j,col,ncols,*bi,*bj;
2580   PetscErrorCode ierr;
2581   Mat            B = mat->B;
2582   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2583 
2584   PetscFunctionBegin;
2585   /* When a process holds entire A and other processes have no entry */
2586   if (A->cmap->N == n) {
2587     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2588     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2589     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2590     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2591     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2592     PetscFunctionReturn(0);
2593   } else if (n == 0) {
2594     if (m) {
2595       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2596       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2597       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2598     }
2599     PetscFunctionReturn(0);
2600   }
2601 
2602   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2603   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2604   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2605   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2606 
2607   /* Get offdiagIdx[] for implicit 0.0 */
2608   ba = b->a;
2609   bi = b->i;
2610   bj = b->j;
2611   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2612   for (r = 0; r < m; r++) {
2613     ncols = bi[r+1] - bi[r];
2614     if (ncols == A->cmap->N - n) { /* Brow is dense */
2615       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2616     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2617       offdiagA[r] = 0.0;
2618 
2619       /* Find first hole in the cmap */
2620       for (j=0; j<ncols; j++) {
2621         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2622         if (col > j && j < cstart) {
2623           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2624           break;
2625         } else if (col > j + n && j >= cstart) {
2626           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2627           break;
2628         }
2629       }
2630       if (j == ncols && ncols < A->cmap->N - n) {
2631         /* a hole is outside compressed Bcols */
2632         if (ncols == 0) {
2633           if (cstart) {
2634             offdiagIdx[r] = 0;
2635           } else offdiagIdx[r] = cend;
2636         } else { /* ncols > 0 */
2637           offdiagIdx[r] = cmap[ncols-1] + 1;
2638           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2639         }
2640       }
2641     }
2642 
2643     for (j=0; j<ncols; j++) {
2644       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2645       ba++; bj++;
2646     }
2647   }
2648 
2649   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2650   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2651   for (r = 0; r < m; ++r) {
2652     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2653       a[r] = diagA[r];
2654       if (idx) idx[r] = cstart + diagIdx[r];
2655     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2656       a[r] = diagA[r];
2657       if (idx) {
2658         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2659           idx[r] = cstart + diagIdx[r];
2660         } else idx[r] = offdiagIdx[r];
2661       }
2662     } else {
2663       a[r] = offdiagA[r];
2664       if (idx) idx[r] = offdiagIdx[r];
2665     }
2666   }
2667   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2668   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2669   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2670   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2671   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2672   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2677 {
2678   PetscErrorCode ierr;
2679   Mat            *dummy;
2680 
2681   PetscFunctionBegin;
2682   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2683   *newmat = *dummy;
2684   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2689 {
2690   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2691   PetscErrorCode ierr;
2692 
2693   PetscFunctionBegin;
2694   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2695   A->factorerrortype = a->A->factorerrortype;
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2700 {
2701   PetscErrorCode ierr;
2702   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2703 
2704   PetscFunctionBegin;
2705   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2706   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2707   if (x->assembled) {
2708     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2709   } else {
2710     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2711   }
2712   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2713   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2714   PetscFunctionReturn(0);
2715 }
2716 
2717 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2718 {
2719   PetscFunctionBegin;
2720   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2721   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 /*@
2726    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2727 
2728    Collective on Mat
2729 
2730    Input Parameters:
2731 +    A - the matrix
2732 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2733 
2734  Level: advanced
2735 
2736 @*/
2737 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2738 {
2739   PetscErrorCode       ierr;
2740 
2741   PetscFunctionBegin;
2742   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2747 {
2748   PetscErrorCode       ierr;
2749   PetscBool            sc = PETSC_FALSE,flg;
2750 
2751   PetscFunctionBegin;
2752   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2753   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2754   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2755   if (flg) {
2756     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2757   }
2758   ierr = PetscOptionsTail();CHKERRQ(ierr);
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2763 {
2764   PetscErrorCode ierr;
2765   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2766   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2767 
2768   PetscFunctionBegin;
2769   if (!Y->preallocated) {
2770     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2771   } else if (!aij->nz) {
2772     PetscInt nonew = aij->nonew;
2773     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2774     aij->nonew = nonew;
2775   }
2776   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2781 {
2782   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2783   PetscErrorCode ierr;
2784 
2785   PetscFunctionBegin;
2786   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2787   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2788   if (d) {
2789     PetscInt rstart;
2790     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2791     *d += rstart;
2792 
2793   }
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2798 {
2799   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2800   PetscErrorCode ierr;
2801 
2802   PetscFunctionBegin;
2803   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 /* -------------------------------------------------------------------*/
2808 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2809                                        MatGetRow_MPIAIJ,
2810                                        MatRestoreRow_MPIAIJ,
2811                                        MatMult_MPIAIJ,
2812                                 /* 4*/ MatMultAdd_MPIAIJ,
2813                                        MatMultTranspose_MPIAIJ,
2814                                        MatMultTransposeAdd_MPIAIJ,
2815                                        NULL,
2816                                        NULL,
2817                                        NULL,
2818                                 /*10*/ NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        MatSOR_MPIAIJ,
2822                                        MatTranspose_MPIAIJ,
2823                                 /*15*/ MatGetInfo_MPIAIJ,
2824                                        MatEqual_MPIAIJ,
2825                                        MatGetDiagonal_MPIAIJ,
2826                                        MatDiagonalScale_MPIAIJ,
2827                                        MatNorm_MPIAIJ,
2828                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2829                                        MatAssemblyEnd_MPIAIJ,
2830                                        MatSetOption_MPIAIJ,
2831                                        MatZeroEntries_MPIAIJ,
2832                                 /*24*/ MatZeroRows_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        NULL,
2836                                        NULL,
2837                                 /*29*/ MatSetUp_MPIAIJ,
2838                                        NULL,
2839                                        NULL,
2840                                        MatGetDiagonalBlock_MPIAIJ,
2841                                        NULL,
2842                                 /*34*/ MatDuplicate_MPIAIJ,
2843                                        NULL,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                 /*39*/ MatAXPY_MPIAIJ,
2848                                        MatCreateSubMatrices_MPIAIJ,
2849                                        MatIncreaseOverlap_MPIAIJ,
2850                                        MatGetValues_MPIAIJ,
2851                                        MatCopy_MPIAIJ,
2852                                 /*44*/ MatGetRowMax_MPIAIJ,
2853                                        MatScale_MPIAIJ,
2854                                        MatShift_MPIAIJ,
2855                                        MatDiagonalSet_MPIAIJ,
2856                                        MatZeroRowsColumns_MPIAIJ,
2857                                 /*49*/ MatSetRandom_MPIAIJ,
2858                                        NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2863                                        NULL,
2864                                        MatSetUnfactored_MPIAIJ,
2865                                        MatPermute_MPIAIJ,
2866                                        NULL,
2867                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2868                                        MatDestroy_MPIAIJ,
2869                                        MatView_MPIAIJ,
2870                                        NULL,
2871                                        NULL,
2872                                 /*64*/ NULL,
2873                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2874                                        NULL,
2875                                        NULL,
2876                                        NULL,
2877                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2878                                        MatGetRowMinAbs_MPIAIJ,
2879                                        NULL,
2880                                        NULL,
2881                                        NULL,
2882                                        NULL,
2883                                 /*75*/ MatFDColoringApply_AIJ,
2884                                        MatSetFromOptions_MPIAIJ,
2885                                        NULL,
2886                                        NULL,
2887                                        MatFindZeroDiagonals_MPIAIJ,
2888                                 /*80*/ NULL,
2889                                        NULL,
2890                                        NULL,
2891                                 /*83*/ MatLoad_MPIAIJ,
2892                                        MatIsSymmetric_MPIAIJ,
2893                                        NULL,
2894                                        NULL,
2895                                        NULL,
2896                                        NULL,
2897                                 /*89*/ NULL,
2898                                        NULL,
2899                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2900                                        NULL,
2901                                        NULL,
2902                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2903                                        NULL,
2904                                        NULL,
2905                                        NULL,
2906                                        MatBindToCPU_MPIAIJ,
2907                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2908                                        NULL,
2909                                        NULL,
2910                                        MatConjugate_MPIAIJ,
2911                                        NULL,
2912                                 /*104*/MatSetValuesRow_MPIAIJ,
2913                                        MatRealPart_MPIAIJ,
2914                                        MatImaginaryPart_MPIAIJ,
2915                                        NULL,
2916                                        NULL,
2917                                 /*109*/NULL,
2918                                        NULL,
2919                                        MatGetRowMin_MPIAIJ,
2920                                        NULL,
2921                                        MatMissingDiagonal_MPIAIJ,
2922                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2923                                        NULL,
2924                                        MatGetGhosts_MPIAIJ,
2925                                        NULL,
2926                                        NULL,
2927                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2928                                        NULL,
2929                                        NULL,
2930                                        NULL,
2931                                        MatGetMultiProcBlock_MPIAIJ,
2932                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2933                                        MatGetColumnNorms_MPIAIJ,
2934                                        MatInvertBlockDiagonal_MPIAIJ,
2935                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2936                                        MatCreateSubMatricesMPI_MPIAIJ,
2937                                 /*129*/NULL,
2938                                        NULL,
2939                                        NULL,
2940                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2941                                        NULL,
2942                                 /*134*/NULL,
2943                                        NULL,
2944                                        NULL,
2945                                        NULL,
2946                                        NULL,
2947                                 /*139*/MatSetBlockSizes_MPIAIJ,
2948                                        NULL,
2949                                        NULL,
2950                                        MatFDColoringSetUp_MPIXAIJ,
2951                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2952                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2953                                 /*145*/NULL,
2954                                        NULL,
2955                                        NULL
2956 };
2957 
2958 /* ----------------------------------------------------------------------------------------*/
2959 
2960 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2961 {
2962   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2963   PetscErrorCode ierr;
2964 
2965   PetscFunctionBegin;
2966   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2967   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2972 {
2973   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2974   PetscErrorCode ierr;
2975 
2976   PetscFunctionBegin;
2977   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2978   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2979   PetscFunctionReturn(0);
2980 }
2981 
2982 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2983 {
2984   Mat_MPIAIJ     *b;
2985   PetscErrorCode ierr;
2986   PetscMPIInt    size;
2987 
2988   PetscFunctionBegin;
2989   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2990   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2991   b = (Mat_MPIAIJ*)B->data;
2992 
2993 #if defined(PETSC_USE_CTABLE)
2994   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2995 #else
2996   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2997 #endif
2998   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2999   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3000   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3001 
3002   /* Because the B will have been resized we simply destroy it and create a new one each time */
3003   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3004   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3005   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3006   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
3007   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3008   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3009   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3010 
3011   if (!B->preallocated) {
3012     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3013     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3014     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3015     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3016     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3017   }
3018 
3019   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3020   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3021   B->preallocated  = PETSC_TRUE;
3022   B->was_assembled = PETSC_FALSE;
3023   B->assembled     = PETSC_FALSE;
3024   PetscFunctionReturn(0);
3025 }
3026 
3027 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3028 {
3029   Mat_MPIAIJ     *b;
3030   PetscErrorCode ierr;
3031 
3032   PetscFunctionBegin;
3033   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3034   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3035   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3036   b = (Mat_MPIAIJ*)B->data;
3037 
3038 #if defined(PETSC_USE_CTABLE)
3039   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3040 #else
3041   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3042 #endif
3043   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3044   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3045   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3046 
3047   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
3048   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
3049   B->preallocated  = PETSC_TRUE;
3050   B->was_assembled = PETSC_FALSE;
3051   B->assembled = PETSC_FALSE;
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3056 {
3057   Mat            mat;
3058   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3059   PetscErrorCode ierr;
3060 
3061   PetscFunctionBegin;
3062   *newmat = NULL;
3063   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3064   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3065   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3066   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3067   a       = (Mat_MPIAIJ*)mat->data;
3068 
3069   mat->factortype   = matin->factortype;
3070   mat->assembled    = matin->assembled;
3071   mat->insertmode   = NOT_SET_VALUES;
3072   mat->preallocated = matin->preallocated;
3073 
3074   a->size         = oldmat->size;
3075   a->rank         = oldmat->rank;
3076   a->donotstash   = oldmat->donotstash;
3077   a->roworiented  = oldmat->roworiented;
3078   a->rowindices   = NULL;
3079   a->rowvalues    = NULL;
3080   a->getrowactive = PETSC_FALSE;
3081 
3082   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3083   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3084 
3085   if (oldmat->colmap) {
3086 #if defined(PETSC_USE_CTABLE)
3087     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3088 #else
3089     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3090     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3091     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3092 #endif
3093   } else a->colmap = NULL;
3094   if (oldmat->garray) {
3095     PetscInt len;
3096     len  = oldmat->B->cmap->n;
3097     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3098     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3099     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3100   } else a->garray = NULL;
3101 
3102   /* It may happen MatDuplicate is called with a non-assembled matrix
3103      In fact, MatDuplicate only requires the matrix to be preallocated
3104      This may happen inside a DMCreateMatrix_Shell */
3105   if (oldmat->lvec) {
3106     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3107     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3108   }
3109   if (oldmat->Mvctx) {
3110     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3111     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3112   }
3113   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3114   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3115   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3116   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3117   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3118   *newmat = mat;
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3123 {
3124   PetscBool      isbinary, ishdf5;
3125   PetscErrorCode ierr;
3126 
3127   PetscFunctionBegin;
3128   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3129   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3130   /* force binary viewer to load .info file if it has not yet done so */
3131   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3132   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3133   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3134   if (isbinary) {
3135     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3136   } else if (ishdf5) {
3137 #if defined(PETSC_HAVE_HDF5)
3138     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3139 #else
3140     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3141 #endif
3142   } else {
3143     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3144   }
3145   PetscFunctionReturn(0);
3146 }
3147 
3148 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3149 {
3150   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3151   PetscInt       *rowidxs,*colidxs;
3152   PetscScalar    *matvals;
3153   PetscErrorCode ierr;
3154 
3155   PetscFunctionBegin;
3156   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3157 
3158   /* read in matrix header */
3159   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3160   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3161   M  = header[1]; N = header[2]; nz = header[3];
3162   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3163   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3164   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3165 
3166   /* set block sizes from the viewer's .info file */
3167   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3168   /* set global sizes if not set already */
3169   if (mat->rmap->N < 0) mat->rmap->N = M;
3170   if (mat->cmap->N < 0) mat->cmap->N = N;
3171   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3172   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3173 
3174   /* check if the matrix sizes are correct */
3175   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3176   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3177 
3178   /* read in row lengths and build row indices */
3179   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3180   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3181   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3182   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3183   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3184   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3185   /* read in column indices and matrix values */
3186   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3187   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3188   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3189   /* store matrix indices and values */
3190   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3191   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3192   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3193   PetscFunctionReturn(0);
3194 }
3195 
3196 /* Not scalable because of ISAllGather() unless getting all columns. */
3197 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3198 {
3199   PetscErrorCode ierr;
3200   IS             iscol_local;
3201   PetscBool      isstride;
3202   PetscMPIInt    lisstride=0,gisstride;
3203 
3204   PetscFunctionBegin;
3205   /* check if we are grabbing all columns*/
3206   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3207 
3208   if (isstride) {
3209     PetscInt  start,len,mstart,mlen;
3210     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3211     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3212     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3213     if (mstart == start && mlen-mstart == len) lisstride = 1;
3214   }
3215 
3216   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3217   if (gisstride) {
3218     PetscInt N;
3219     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3220     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3221     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3222     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3223   } else {
3224     PetscInt cbs;
3225     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3226     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3227     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3228   }
3229 
3230   *isseq = iscol_local;
3231   PetscFunctionReturn(0);
3232 }
3233 
3234 /*
3235  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3236  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3237 
3238  Input Parameters:
3239    mat - matrix
3240    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3241            i.e., mat->rstart <= isrow[i] < mat->rend
3242    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3243            i.e., mat->cstart <= iscol[i] < mat->cend
3244  Output Parameter:
3245    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3246    iscol_o - sequential column index set for retrieving mat->B
3247    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3248  */
3249 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3250 {
3251   PetscErrorCode ierr;
3252   Vec            x,cmap;
3253   const PetscInt *is_idx;
3254   PetscScalar    *xarray,*cmaparray;
3255   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3256   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3257   Mat            B=a->B;
3258   Vec            lvec=a->lvec,lcmap;
3259   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3260   MPI_Comm       comm;
3261   VecScatter     Mvctx=a->Mvctx;
3262 
3263   PetscFunctionBegin;
3264   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3265   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3266 
3267   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3268   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3269   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3270   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3271   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3272 
3273   /* Get start indices */
3274   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3275   isstart -= ncols;
3276   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3277 
3278   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3279   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3280   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3281   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3282   for (i=0; i<ncols; i++) {
3283     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3284     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3285     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3286   }
3287   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3288   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3289   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3290 
3291   /* Get iscol_d */
3292   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3293   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3294   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3295 
3296   /* Get isrow_d */
3297   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3298   rstart = mat->rmap->rstart;
3299   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3300   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3301   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3302   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3303 
3304   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3305   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3306   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3307 
3308   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3309   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3310   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3311 
3312   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3313 
3314   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3315   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3316 
3317   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3318   /* off-process column indices */
3319   count = 0;
3320   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3321   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3322 
3323   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3324   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3325   for (i=0; i<Bn; i++) {
3326     if (PetscRealPart(xarray[i]) > -1.0) {
3327       idx[count]     = i;                   /* local column index in off-diagonal part B */
3328       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3329       count++;
3330     }
3331   }
3332   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3333   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3334 
3335   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3336   /* cannot ensure iscol_o has same blocksize as iscol! */
3337 
3338   ierr = PetscFree(idx);CHKERRQ(ierr);
3339   *garray = cmap1;
3340 
3341   ierr = VecDestroy(&x);CHKERRQ(ierr);
3342   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3343   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3344   PetscFunctionReturn(0);
3345 }
3346 
3347 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3349 {
3350   PetscErrorCode ierr;
3351   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3352   Mat            M = NULL;
3353   MPI_Comm       comm;
3354   IS             iscol_d,isrow_d,iscol_o;
3355   Mat            Asub = NULL,Bsub = NULL;
3356   PetscInt       n;
3357 
3358   PetscFunctionBegin;
3359   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3360 
3361   if (call == MAT_REUSE_MATRIX) {
3362     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3363     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3364     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3365 
3366     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3367     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3368 
3369     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3370     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3371 
3372     /* Update diagonal and off-diagonal portions of submat */
3373     asub = (Mat_MPIAIJ*)(*submat)->data;
3374     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3375     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3376     if (n) {
3377       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3378     }
3379     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3380     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3381 
3382   } else { /* call == MAT_INITIAL_MATRIX) */
3383     const PetscInt *garray;
3384     PetscInt        BsubN;
3385 
3386     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3387     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3388 
3389     /* Create local submatrices Asub and Bsub */
3390     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3391     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3392 
3393     /* Create submatrix M */
3394     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3395 
3396     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3397     asub = (Mat_MPIAIJ*)M->data;
3398 
3399     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3400     n = asub->B->cmap->N;
3401     if (BsubN > n) {
3402       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3403       const PetscInt *idx;
3404       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3405       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3406 
3407       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3408       j = 0;
3409       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3410       for (i=0; i<n; i++) {
3411         if (j >= BsubN) break;
3412         while (subgarray[i] > garray[j]) j++;
3413 
3414         if (subgarray[i] == garray[j]) {
3415           idx_new[i] = idx[j++];
3416         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3417       }
3418       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3419 
3420       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3421       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3422 
3423     } else if (BsubN < n) {
3424       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3425     }
3426 
3427     ierr = PetscFree(garray);CHKERRQ(ierr);
3428     *submat = M;
3429 
3430     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3431     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3432     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3433 
3434     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3435     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3436 
3437     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3438     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3439   }
3440   PetscFunctionReturn(0);
3441 }
3442 
3443 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3444 {
3445   PetscErrorCode ierr;
3446   IS             iscol_local=NULL,isrow_d;
3447   PetscInt       csize;
3448   PetscInt       n,i,j,start,end;
3449   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3450   MPI_Comm       comm;
3451 
3452   PetscFunctionBegin;
3453   /* If isrow has same processor distribution as mat,
3454      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3455   if (call == MAT_REUSE_MATRIX) {
3456     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3457     if (isrow_d) {
3458       sameRowDist  = PETSC_TRUE;
3459       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3460     } else {
3461       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3462       if (iscol_local) {
3463         sameRowDist  = PETSC_TRUE;
3464         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3465       }
3466     }
3467   } else {
3468     /* Check if isrow has same processor distribution as mat */
3469     sameDist[0] = PETSC_FALSE;
3470     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3471     if (!n) {
3472       sameDist[0] = PETSC_TRUE;
3473     } else {
3474       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3475       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3476       if (i >= start && j < end) {
3477         sameDist[0] = PETSC_TRUE;
3478       }
3479     }
3480 
3481     /* Check if iscol has same processor distribution as mat */
3482     sameDist[1] = PETSC_FALSE;
3483     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3484     if (!n) {
3485       sameDist[1] = PETSC_TRUE;
3486     } else {
3487       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3488       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3489       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3490     }
3491 
3492     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3493     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3494     sameRowDist = tsameDist[0];
3495   }
3496 
3497   if (sameRowDist) {
3498     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3499       /* isrow and iscol have same processor distribution as mat */
3500       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3501       PetscFunctionReturn(0);
3502     } else { /* sameRowDist */
3503       /* isrow has same processor distribution as mat */
3504       if (call == MAT_INITIAL_MATRIX) {
3505         PetscBool sorted;
3506         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3507         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3508         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3509         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3510 
3511         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3512         if (sorted) {
3513           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3514           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3515           PetscFunctionReturn(0);
3516         }
3517       } else { /* call == MAT_REUSE_MATRIX */
3518         IS    iscol_sub;
3519         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3520         if (iscol_sub) {
3521           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3522           PetscFunctionReturn(0);
3523         }
3524       }
3525     }
3526   }
3527 
3528   /* General case: iscol -> iscol_local which has global size of iscol */
3529   if (call == MAT_REUSE_MATRIX) {
3530     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3531     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3532   } else {
3533     if (!iscol_local) {
3534       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3535     }
3536   }
3537 
3538   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3539   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3540 
3541   if (call == MAT_INITIAL_MATRIX) {
3542     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3543     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3544   }
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 /*@C
3549      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3550          and "off-diagonal" part of the matrix in CSR format.
3551 
3552    Collective
3553 
3554    Input Parameters:
3555 +  comm - MPI communicator
3556 .  A - "diagonal" portion of matrix
3557 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3558 -  garray - global index of B columns
3559 
3560    Output Parameter:
3561 .   mat - the matrix, with input A as its local diagonal matrix
3562    Level: advanced
3563 
3564    Notes:
3565        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3566        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3567 
3568 .seealso: MatCreateMPIAIJWithSplitArrays()
3569 @*/
3570 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3571 {
3572   PetscErrorCode ierr;
3573   Mat_MPIAIJ     *maij;
3574   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3575   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3576   PetscScalar    *oa=b->a;
3577   Mat            Bnew;
3578   PetscInt       m,n,N;
3579 
3580   PetscFunctionBegin;
3581   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3582   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3583   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3584   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3585   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3586   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3587 
3588   /* Get global columns of mat */
3589   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3590 
3591   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3592   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3593   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3594   maij = (Mat_MPIAIJ*)(*mat)->data;
3595 
3596   (*mat)->preallocated = PETSC_TRUE;
3597 
3598   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3599   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3600 
3601   /* Set A as diagonal portion of *mat */
3602   maij->A = A;
3603 
3604   nz = oi[m];
3605   for (i=0; i<nz; i++) {
3606     col   = oj[i];
3607     oj[i] = garray[col];
3608   }
3609 
3610    /* Set Bnew as off-diagonal portion of *mat */
3611   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3612   bnew        = (Mat_SeqAIJ*)Bnew->data;
3613   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3614   maij->B     = Bnew;
3615 
3616   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3617 
3618   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3619   b->free_a       = PETSC_FALSE;
3620   b->free_ij      = PETSC_FALSE;
3621   ierr = MatDestroy(&B);CHKERRQ(ierr);
3622 
3623   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3624   bnew->free_a       = PETSC_TRUE;
3625   bnew->free_ij      = PETSC_TRUE;
3626 
3627   /* condense columns of maij->B */
3628   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3629   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3630   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3632   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3633   PetscFunctionReturn(0);
3634 }
3635 
3636 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3637 
3638 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3639 {
3640   PetscErrorCode ierr;
3641   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3642   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3643   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3644   Mat            M,Msub,B=a->B;
3645   MatScalar      *aa;
3646   Mat_SeqAIJ     *aij;
3647   PetscInt       *garray = a->garray,*colsub,Ncols;
3648   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3649   IS             iscol_sub,iscmap;
3650   const PetscInt *is_idx,*cmap;
3651   PetscBool      allcolumns=PETSC_FALSE;
3652   MPI_Comm       comm;
3653 
3654   PetscFunctionBegin;
3655   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3656 
3657   if (call == MAT_REUSE_MATRIX) {
3658     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3659     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3660     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3661 
3662     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3663     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3664 
3665     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3666     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3667 
3668     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3669 
3670   } else { /* call == MAT_INITIAL_MATRIX) */
3671     PetscBool flg;
3672 
3673     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3674     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3675 
3676     /* (1) iscol -> nonscalable iscol_local */
3677     /* Check for special case: each processor gets entire matrix columns */
3678     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3679     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3680     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3681     if (allcolumns) {
3682       iscol_sub = iscol_local;
3683       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3684       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3685 
3686     } else {
3687       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3688       PetscInt *idx,*cmap1,k;
3689       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3690       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3691       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3692       count = 0;
3693       k     = 0;
3694       for (i=0; i<Ncols; i++) {
3695         j = is_idx[i];
3696         if (j >= cstart && j < cend) {
3697           /* diagonal part of mat */
3698           idx[count]     = j;
3699           cmap1[count++] = i; /* column index in submat */
3700         } else if (Bn) {
3701           /* off-diagonal part of mat */
3702           if (j == garray[k]) {
3703             idx[count]     = j;
3704             cmap1[count++] = i;  /* column index in submat */
3705           } else if (j > garray[k]) {
3706             while (j > garray[k] && k < Bn-1) k++;
3707             if (j == garray[k]) {
3708               idx[count]     = j;
3709               cmap1[count++] = i; /* column index in submat */
3710             }
3711           }
3712         }
3713       }
3714       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3715 
3716       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3717       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3718       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3719 
3720       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3721     }
3722 
3723     /* (3) Create sequential Msub */
3724     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3725   }
3726 
3727   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3728   aij  = (Mat_SeqAIJ*)(Msub)->data;
3729   ii   = aij->i;
3730   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3731 
3732   /*
3733       m - number of local rows
3734       Ncols - number of columns (same on all processors)
3735       rstart - first row in new global matrix generated
3736   */
3737   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3738 
3739   if (call == MAT_INITIAL_MATRIX) {
3740     /* (4) Create parallel newmat */
3741     PetscMPIInt    rank,size;
3742     PetscInt       csize;
3743 
3744     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3745     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3746 
3747     /*
3748         Determine the number of non-zeros in the diagonal and off-diagonal
3749         portions of the matrix in order to do correct preallocation
3750     */
3751 
3752     /* first get start and end of "diagonal" columns */
3753     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3754     if (csize == PETSC_DECIDE) {
3755       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3756       if (mglobal == Ncols) { /* square matrix */
3757         nlocal = m;
3758       } else {
3759         nlocal = Ncols/size + ((Ncols % size) > rank);
3760       }
3761     } else {
3762       nlocal = csize;
3763     }
3764     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3765     rstart = rend - nlocal;
3766     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3767 
3768     /* next, compute all the lengths */
3769     jj    = aij->j;
3770     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3771     olens = dlens + m;
3772     for (i=0; i<m; i++) {
3773       jend = ii[i+1] - ii[i];
3774       olen = 0;
3775       dlen = 0;
3776       for (j=0; j<jend; j++) {
3777         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3778         else dlen++;
3779         jj++;
3780       }
3781       olens[i] = olen;
3782       dlens[i] = dlen;
3783     }
3784 
3785     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3786     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3787 
3788     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3789     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3790     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3791     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3792     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3793     ierr = PetscFree(dlens);CHKERRQ(ierr);
3794 
3795   } else { /* call == MAT_REUSE_MATRIX */
3796     M    = *newmat;
3797     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3798     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3799     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3800     /*
3801          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3802        rather than the slower MatSetValues().
3803     */
3804     M->was_assembled = PETSC_TRUE;
3805     M->assembled     = PETSC_FALSE;
3806   }
3807 
3808   /* (5) Set values of Msub to *newmat */
3809   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3810   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3811 
3812   jj   = aij->j;
3813   aa   = aij->a;
3814   for (i=0; i<m; i++) {
3815     row = rstart + i;
3816     nz  = ii[i+1] - ii[i];
3817     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3818     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3819     jj += nz; aa += nz;
3820   }
3821   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3822 
3823   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3824   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3825 
3826   ierr = PetscFree(colsub);CHKERRQ(ierr);
3827 
3828   /* save Msub, iscol_sub and iscmap used in processor for next request */
3829   if (call ==  MAT_INITIAL_MATRIX) {
3830     *newmat = M;
3831     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3832     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3833 
3834     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3835     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3836 
3837     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3838     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3839 
3840     if (iscol_local) {
3841       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3842       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3843     }
3844   }
3845   PetscFunctionReturn(0);
3846 }
3847 
3848 /*
3849     Not great since it makes two copies of the submatrix, first an SeqAIJ
3850   in local and then by concatenating the local matrices the end result.
3851   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3852 
3853   Note: This requires a sequential iscol with all indices.
3854 */
3855 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3856 {
3857   PetscErrorCode ierr;
3858   PetscMPIInt    rank,size;
3859   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3860   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3861   Mat            M,Mreuse;
3862   MatScalar      *aa,*vwork;
3863   MPI_Comm       comm;
3864   Mat_SeqAIJ     *aij;
3865   PetscBool      colflag,allcolumns=PETSC_FALSE;
3866 
3867   PetscFunctionBegin;
3868   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3869   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3871 
3872   /* Check for special case: each processor gets entire matrix columns */
3873   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3874   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3875   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3876   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3877 
3878   if (call ==  MAT_REUSE_MATRIX) {
3879     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3880     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3881     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3882   } else {
3883     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3884   }
3885 
3886   /*
3887       m - number of local rows
3888       n - number of columns (same on all processors)
3889       rstart - first row in new global matrix generated
3890   */
3891   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3892   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3893   if (call == MAT_INITIAL_MATRIX) {
3894     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3895     ii  = aij->i;
3896     jj  = aij->j;
3897 
3898     /*
3899         Determine the number of non-zeros in the diagonal and off-diagonal
3900         portions of the matrix in order to do correct preallocation
3901     */
3902 
3903     /* first get start and end of "diagonal" columns */
3904     if (csize == PETSC_DECIDE) {
3905       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3906       if (mglobal == n) { /* square matrix */
3907         nlocal = m;
3908       } else {
3909         nlocal = n/size + ((n % size) > rank);
3910       }
3911     } else {
3912       nlocal = csize;
3913     }
3914     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3915     rstart = rend - nlocal;
3916     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3917 
3918     /* next, compute all the lengths */
3919     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3920     olens = dlens + m;
3921     for (i=0; i<m; i++) {
3922       jend = ii[i+1] - ii[i];
3923       olen = 0;
3924       dlen = 0;
3925       for (j=0; j<jend; j++) {
3926         if (*jj < rstart || *jj >= rend) olen++;
3927         else dlen++;
3928         jj++;
3929       }
3930       olens[i] = olen;
3931       dlens[i] = dlen;
3932     }
3933     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3934     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3935     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3936     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3937     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3938     ierr = PetscFree(dlens);CHKERRQ(ierr);
3939   } else {
3940     PetscInt ml,nl;
3941 
3942     M    = *newmat;
3943     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3944     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3945     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3946     /*
3947          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3948        rather than the slower MatSetValues().
3949     */
3950     M->was_assembled = PETSC_TRUE;
3951     M->assembled     = PETSC_FALSE;
3952   }
3953   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3954   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3955   ii   = aij->i;
3956   jj   = aij->j;
3957   aa   = aij->a;
3958   for (i=0; i<m; i++) {
3959     row   = rstart + i;
3960     nz    = ii[i+1] - ii[i];
3961     cwork = jj;     jj += nz;
3962     vwork = aa;     aa += nz;
3963     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3964   }
3965 
3966   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3967   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3968   *newmat = M;
3969 
3970   /* save submatrix used in processor for next request */
3971   if (call ==  MAT_INITIAL_MATRIX) {
3972     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3973     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3974   }
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3979 {
3980   PetscInt       m,cstart, cend,j,nnz,i,d;
3981   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3982   const PetscInt *JJ;
3983   PetscErrorCode ierr;
3984   PetscBool      nooffprocentries;
3985 
3986   PetscFunctionBegin;
3987   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3988 
3989   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3990   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3991   m      = B->rmap->n;
3992   cstart = B->cmap->rstart;
3993   cend   = B->cmap->rend;
3994   rstart = B->rmap->rstart;
3995 
3996   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3997 
3998   if (PetscDefined(USE_DEBUG)) {
3999     for (i=0; i<m; i++) {
4000       nnz = Ii[i+1]- Ii[i];
4001       JJ  = J + Ii[i];
4002       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4003       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
4004       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4005     }
4006   }
4007 
4008   for (i=0; i<m; i++) {
4009     nnz     = Ii[i+1]- Ii[i];
4010     JJ      = J + Ii[i];
4011     nnz_max = PetscMax(nnz_max,nnz);
4012     d       = 0;
4013     for (j=0; j<nnz; j++) {
4014       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4015     }
4016     d_nnz[i] = d;
4017     o_nnz[i] = nnz - d;
4018   }
4019   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4020   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4021 
4022   for (i=0; i<m; i++) {
4023     ii   = i + rstart;
4024     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4025   }
4026   nooffprocentries    = B->nooffprocentries;
4027   B->nooffprocentries = PETSC_TRUE;
4028   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4029   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4030   B->nooffprocentries = nooffprocentries;
4031 
4032   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4033   PetscFunctionReturn(0);
4034 }
4035 
4036 /*@
4037    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4038    (the default parallel PETSc format).
4039 
4040    Collective
4041 
4042    Input Parameters:
4043 +  B - the matrix
4044 .  i - the indices into j for the start of each local row (starts with zero)
4045 .  j - the column indices for each local row (starts with zero)
4046 -  v - optional values in the matrix
4047 
4048    Level: developer
4049 
4050    Notes:
4051        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4052      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4053      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4054 
4055        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4056 
4057        The format which is used for the sparse matrix input, is equivalent to a
4058     row-major ordering.. i.e for the following matrix, the input data expected is
4059     as shown
4060 
4061 $        1 0 0
4062 $        2 0 3     P0
4063 $       -------
4064 $        4 5 6     P1
4065 $
4066 $     Process0 [P0]: rows_owned=[0,1]
4067 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4068 $        j =  {0,0,2}  [size = 3]
4069 $        v =  {1,2,3}  [size = 3]
4070 $
4071 $     Process1 [P1]: rows_owned=[2]
4072 $        i =  {0,3}    [size = nrow+1  = 1+1]
4073 $        j =  {0,1,2}  [size = 3]
4074 $        v =  {4,5,6}  [size = 3]
4075 
4076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4077           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4078 @*/
4079 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4080 {
4081   PetscErrorCode ierr;
4082 
4083   PetscFunctionBegin;
4084   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4085   PetscFunctionReturn(0);
4086 }
4087 
4088 /*@C
4089    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4090    (the default parallel PETSc format).  For good matrix assembly performance
4091    the user should preallocate the matrix storage by setting the parameters
4092    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4093    performance can be increased by more than a factor of 50.
4094 
4095    Collective
4096 
4097    Input Parameters:
4098 +  B - the matrix
4099 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4100            (same value is used for all local rows)
4101 .  d_nnz - array containing the number of nonzeros in the various rows of the
4102            DIAGONAL portion of the local submatrix (possibly different for each row)
4103            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4104            The size of this array is equal to the number of local rows, i.e 'm'.
4105            For matrices that will be factored, you must leave room for (and set)
4106            the diagonal entry even if it is zero.
4107 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4108            submatrix (same value is used for all local rows).
4109 -  o_nnz - array containing the number of nonzeros in the various rows of the
4110            OFF-DIAGONAL portion of the local submatrix (possibly different for
4111            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4112            structure. The size of this array is equal to the number
4113            of local rows, i.e 'm'.
4114 
4115    If the *_nnz parameter is given then the *_nz parameter is ignored
4116 
4117    The AIJ format (also called the Yale sparse matrix format or
4118    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4119    storage.  The stored row and column indices begin with zero.
4120    See Users-Manual: ch_mat for details.
4121 
4122    The parallel matrix is partitioned such that the first m0 rows belong to
4123    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4124    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4125 
4126    The DIAGONAL portion of the local submatrix of a processor can be defined
4127    as the submatrix which is obtained by extraction the part corresponding to
4128    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4129    first row that belongs to the processor, r2 is the last row belonging to
4130    the this processor, and c1-c2 is range of indices of the local part of a
4131    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4132    common case of a square matrix, the row and column ranges are the same and
4133    the DIAGONAL part is also square. The remaining portion of the local
4134    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4135 
4136    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4137 
4138    You can call MatGetInfo() to get information on how effective the preallocation was;
4139    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4140    You can also run with the option -info and look for messages with the string
4141    malloc in them to see if additional memory allocation was needed.
4142 
4143    Example usage:
4144 
4145    Consider the following 8x8 matrix with 34 non-zero values, that is
4146    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4147    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4148    as follows:
4149 
4150 .vb
4151             1  2  0  |  0  3  0  |  0  4
4152     Proc0   0  5  6  |  7  0  0  |  8  0
4153             9  0 10  | 11  0  0  | 12  0
4154     -------------------------------------
4155            13  0 14  | 15 16 17  |  0  0
4156     Proc1   0 18  0  | 19 20 21  |  0  0
4157             0  0  0  | 22 23  0  | 24  0
4158     -------------------------------------
4159     Proc2  25 26 27  |  0  0 28  | 29  0
4160            30  0  0  | 31 32 33  |  0 34
4161 .ve
4162 
4163    This can be represented as a collection of submatrices as:
4164 
4165 .vb
4166       A B C
4167       D E F
4168       G H I
4169 .ve
4170 
4171    Where the submatrices A,B,C are owned by proc0, D,E,F are
4172    owned by proc1, G,H,I are owned by proc2.
4173 
4174    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4175    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4176    The 'M','N' parameters are 8,8, and have the same values on all procs.
4177 
4178    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4179    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4180    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4181    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4182    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4183    matrix, ans [DF] as another SeqAIJ matrix.
4184 
4185    When d_nz, o_nz parameters are specified, d_nz storage elements are
4186    allocated for every row of the local diagonal submatrix, and o_nz
4187    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4188    One way to choose d_nz and o_nz is to use the max nonzerors per local
4189    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4190    In this case, the values of d_nz,o_nz are:
4191 .vb
4192      proc0 : dnz = 2, o_nz = 2
4193      proc1 : dnz = 3, o_nz = 2
4194      proc2 : dnz = 1, o_nz = 4
4195 .ve
4196    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4197    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4198    for proc3. i.e we are using 12+15+10=37 storage locations to store
4199    34 values.
4200 
4201    When d_nnz, o_nnz parameters are specified, the storage is specified
4202    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4203    In the above case the values for d_nnz,o_nnz are:
4204 .vb
4205      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4206      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4207      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4208 .ve
4209    Here the space allocated is sum of all the above values i.e 34, and
4210    hence pre-allocation is perfect.
4211 
4212    Level: intermediate
4213 
4214 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4215           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4216 @*/
4217 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4218 {
4219   PetscErrorCode ierr;
4220 
4221   PetscFunctionBegin;
4222   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4223   PetscValidType(B,1);
4224   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4225   PetscFunctionReturn(0);
4226 }
4227 
4228 /*@
4229      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4230          CSR format for the local rows.
4231 
4232    Collective
4233 
4234    Input Parameters:
4235 +  comm - MPI communicator
4236 .  m - number of local rows (Cannot be PETSC_DECIDE)
4237 .  n - This value should be the same as the local size used in creating the
4238        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4239        calculated if N is given) For square matrices n is almost always m.
4240 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4241 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4242 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4243 .   j - column indices
4244 -   a - matrix values
4245 
4246    Output Parameter:
4247 .   mat - the matrix
4248 
4249    Level: intermediate
4250 
4251    Notes:
4252        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4253      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4254      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4255 
4256        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4257 
4258        The format which is used for the sparse matrix input, is equivalent to a
4259     row-major ordering.. i.e for the following matrix, the input data expected is
4260     as shown
4261 
4262        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4263 
4264 $        1 0 0
4265 $        2 0 3     P0
4266 $       -------
4267 $        4 5 6     P1
4268 $
4269 $     Process0 [P0]: rows_owned=[0,1]
4270 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4271 $        j =  {0,0,2}  [size = 3]
4272 $        v =  {1,2,3}  [size = 3]
4273 $
4274 $     Process1 [P1]: rows_owned=[2]
4275 $        i =  {0,3}    [size = nrow+1  = 1+1]
4276 $        j =  {0,1,2}  [size = 3]
4277 $        v =  {4,5,6}  [size = 3]
4278 
4279 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4280           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4281 @*/
4282 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4283 {
4284   PetscErrorCode ierr;
4285 
4286   PetscFunctionBegin;
4287   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4288   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4289   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4290   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4291   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4292   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4293   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4294   PetscFunctionReturn(0);
4295 }
4296 
4297 /*@
4298      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4299          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4300 
4301    Collective
4302 
4303    Input Parameters:
4304 +  mat - the matrix
4305 .  m - number of local rows (Cannot be PETSC_DECIDE)
4306 .  n - This value should be the same as the local size used in creating the
4307        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4308        calculated if N is given) For square matrices n is almost always m.
4309 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4310 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4311 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4312 .  J - column indices
4313 -  v - matrix values
4314 
4315    Level: intermediate
4316 
4317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4318           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4319 @*/
4320 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4321 {
4322   PetscErrorCode ierr;
4323   PetscInt       cstart,nnz,i,j;
4324   PetscInt       *ld;
4325   PetscBool      nooffprocentries;
4326   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4327   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4328   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4329   const PetscInt *Adi = Ad->i;
4330   PetscInt       ldi,Iii,md;
4331 
4332   PetscFunctionBegin;
4333   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4334   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4335   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4336   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4337 
4338   cstart = mat->cmap->rstart;
4339   if (!Aij->ld) {
4340     /* count number of entries below block diagonal */
4341     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4342     Aij->ld = ld;
4343     for (i=0; i<m; i++) {
4344       nnz  = Ii[i+1]- Ii[i];
4345       j     = 0;
4346       while  (J[j] < cstart && j < nnz) {j++;}
4347       J    += nnz;
4348       ld[i] = j;
4349     }
4350   } else {
4351     ld = Aij->ld;
4352   }
4353 
4354   for (i=0; i<m; i++) {
4355     nnz  = Ii[i+1]- Ii[i];
4356     Iii  = Ii[i];
4357     ldi  = ld[i];
4358     md   = Adi[i+1]-Adi[i];
4359     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4360     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4361     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4362     ad  += md;
4363     ao  += nnz - md;
4364   }
4365   nooffprocentries      = mat->nooffprocentries;
4366   mat->nooffprocentries = PETSC_TRUE;
4367   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4368   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4369   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4370   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4371   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4372   mat->nooffprocentries = nooffprocentries;
4373   PetscFunctionReturn(0);
4374 }
4375 
4376 /*@C
4377    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4378    (the default parallel PETSc format).  For good matrix assembly performance
4379    the user should preallocate the matrix storage by setting the parameters
4380    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4381    performance can be increased by more than a factor of 50.
4382 
4383    Collective
4384 
4385    Input Parameters:
4386 +  comm - MPI communicator
4387 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4388            This value should be the same as the local size used in creating the
4389            y vector for the matrix-vector product y = Ax.
4390 .  n - This value should be the same as the local size used in creating the
4391        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4392        calculated if N is given) For square matrices n is almost always m.
4393 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4394 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4395 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4396            (same value is used for all local rows)
4397 .  d_nnz - array containing the number of nonzeros in the various rows of the
4398            DIAGONAL portion of the local submatrix (possibly different for each row)
4399            or NULL, if d_nz is used to specify the nonzero structure.
4400            The size of this array is equal to the number of local rows, i.e 'm'.
4401 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4402            submatrix (same value is used for all local rows).
4403 -  o_nnz - array containing the number of nonzeros in the various rows of the
4404            OFF-DIAGONAL portion of the local submatrix (possibly different for
4405            each row) or NULL, if o_nz is used to specify the nonzero
4406            structure. The size of this array is equal to the number
4407            of local rows, i.e 'm'.
4408 
4409    Output Parameter:
4410 .  A - the matrix
4411 
4412    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4413    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4414    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4415 
4416    Notes:
4417    If the *_nnz parameter is given then the *_nz parameter is ignored
4418 
4419    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4420    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4421    storage requirements for this matrix.
4422 
4423    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4424    processor than it must be used on all processors that share the object for
4425    that argument.
4426 
4427    The user MUST specify either the local or global matrix dimensions
4428    (possibly both).
4429 
4430    The parallel matrix is partitioned across processors such that the
4431    first m0 rows belong to process 0, the next m1 rows belong to
4432    process 1, the next m2 rows belong to process 2 etc.. where
4433    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4434    values corresponding to [m x N] submatrix.
4435 
4436    The columns are logically partitioned with the n0 columns belonging
4437    to 0th partition, the next n1 columns belonging to the next
4438    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4439 
4440    The DIAGONAL portion of the local submatrix on any given processor
4441    is the submatrix corresponding to the rows and columns m,n
4442    corresponding to the given processor. i.e diagonal matrix on
4443    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4444    etc. The remaining portion of the local submatrix [m x (N-n)]
4445    constitute the OFF-DIAGONAL portion. The example below better
4446    illustrates this concept.
4447 
4448    For a square global matrix we define each processor's diagonal portion
4449    to be its local rows and the corresponding columns (a square submatrix);
4450    each processor's off-diagonal portion encompasses the remainder of the
4451    local matrix (a rectangular submatrix).
4452 
4453    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4454 
4455    When calling this routine with a single process communicator, a matrix of
4456    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4457    type of communicator, use the construction mechanism
4458 .vb
4459      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4460 .ve
4461 
4462 $     MatCreate(...,&A);
4463 $     MatSetType(A,MATMPIAIJ);
4464 $     MatSetSizes(A, m,n,M,N);
4465 $     MatMPIAIJSetPreallocation(A,...);
4466 
4467    By default, this format uses inodes (identical nodes) when possible.
4468    We search for consecutive rows with the same nonzero structure, thereby
4469    reusing matrix information to achieve increased efficiency.
4470 
4471    Options Database Keys:
4472 +  -mat_no_inode  - Do not use inodes
4473 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4474 
4475 
4476 
4477    Example usage:
4478 
4479    Consider the following 8x8 matrix with 34 non-zero values, that is
4480    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4481    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4482    as follows
4483 
4484 .vb
4485             1  2  0  |  0  3  0  |  0  4
4486     Proc0   0  5  6  |  7  0  0  |  8  0
4487             9  0 10  | 11  0  0  | 12  0
4488     -------------------------------------
4489            13  0 14  | 15 16 17  |  0  0
4490     Proc1   0 18  0  | 19 20 21  |  0  0
4491             0  0  0  | 22 23  0  | 24  0
4492     -------------------------------------
4493     Proc2  25 26 27  |  0  0 28  | 29  0
4494            30  0  0  | 31 32 33  |  0 34
4495 .ve
4496 
4497    This can be represented as a collection of submatrices as
4498 
4499 .vb
4500       A B C
4501       D E F
4502       G H I
4503 .ve
4504 
4505    Where the submatrices A,B,C are owned by proc0, D,E,F are
4506    owned by proc1, G,H,I are owned by proc2.
4507 
4508    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4509    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4510    The 'M','N' parameters are 8,8, and have the same values on all procs.
4511 
4512    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4513    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4514    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4515    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4516    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4517    matrix, ans [DF] as another SeqAIJ matrix.
4518 
4519    When d_nz, o_nz parameters are specified, d_nz storage elements are
4520    allocated for every row of the local diagonal submatrix, and o_nz
4521    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4522    One way to choose d_nz and o_nz is to use the max nonzerors per local
4523    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4524    In this case, the values of d_nz,o_nz are
4525 .vb
4526      proc0 : dnz = 2, o_nz = 2
4527      proc1 : dnz = 3, o_nz = 2
4528      proc2 : dnz = 1, o_nz = 4
4529 .ve
4530    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4531    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4532    for proc3. i.e we are using 12+15+10=37 storage locations to store
4533    34 values.
4534 
4535    When d_nnz, o_nnz parameters are specified, the storage is specified
4536    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4537    In the above case the values for d_nnz,o_nnz are
4538 .vb
4539      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4540      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4541      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4542 .ve
4543    Here the space allocated is sum of all the above values i.e 34, and
4544    hence pre-allocation is perfect.
4545 
4546    Level: intermediate
4547 
4548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4549           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4550 @*/
4551 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4552 {
4553   PetscErrorCode ierr;
4554   PetscMPIInt    size;
4555 
4556   PetscFunctionBegin;
4557   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4558   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4559   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4560   if (size > 1) {
4561     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4562     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4563   } else {
4564     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4565     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4566   }
4567   PetscFunctionReturn(0);
4568 }
4569 
4570 /*@C
4571   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4572 
4573   Not collective
4574 
4575   Input Parameter:
4576 . A - The MPIAIJ matrix
4577 
4578   Output Parameters:
4579 + Ad - The local diagonal block as a SeqAIJ matrix
4580 . Ao - The local off-diagonal block as a SeqAIJ matrix
4581 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4582 
4583   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4584   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4585   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4586   local column numbers to global column numbers in the original matrix.
4587 
4588   Level: intermediate
4589 
4590 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4591 @*/
4592 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4593 {
4594   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4595   PetscBool      flg;
4596   PetscErrorCode ierr;
4597 
4598   PetscFunctionBegin;
4599   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4600   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4601   if (Ad)     *Ad     = a->A;
4602   if (Ao)     *Ao     = a->B;
4603   if (colmap) *colmap = a->garray;
4604   PetscFunctionReturn(0);
4605 }
4606 
4607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4608 {
4609   PetscErrorCode ierr;
4610   PetscInt       m,N,i,rstart,nnz,Ii;
4611   PetscInt       *indx;
4612   PetscScalar    *values;
4613 
4614   PetscFunctionBegin;
4615   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4616   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4617     PetscInt       *dnz,*onz,sum,bs,cbs;
4618 
4619     if (n == PETSC_DECIDE) {
4620       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4621     }
4622     /* Check sum(n) = N */
4623     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4624     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4625 
4626     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4627     rstart -= m;
4628 
4629     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4630     for (i=0; i<m; i++) {
4631       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4632       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4633       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4634     }
4635 
4636     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4637     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4638     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4639     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4640     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4641     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4642     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4643     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4644   }
4645 
4646   /* numeric phase */
4647   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4648   for (i=0; i<m; i++) {
4649     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4650     Ii   = i + rstart;
4651     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4652     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4653   }
4654   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4655   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4656   PetscFunctionReturn(0);
4657 }
4658 
4659 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4660 {
4661   PetscErrorCode    ierr;
4662   PetscMPIInt       rank;
4663   PetscInt          m,N,i,rstart,nnz;
4664   size_t            len;
4665   const PetscInt    *indx;
4666   PetscViewer       out;
4667   char              *name;
4668   Mat               B;
4669   const PetscScalar *values;
4670 
4671   PetscFunctionBegin;
4672   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4673   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4674   /* Should this be the type of the diagonal block of A? */
4675   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4676   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4677   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4678   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4679   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4680   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4681   for (i=0; i<m; i++) {
4682     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4683     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4684     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4685   }
4686   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4687   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4688 
4689   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4690   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4691   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4692   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4693   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4694   ierr = PetscFree(name);CHKERRQ(ierr);
4695   ierr = MatView(B,out);CHKERRQ(ierr);
4696   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4697   ierr = MatDestroy(&B);CHKERRQ(ierr);
4698   PetscFunctionReturn(0);
4699 }
4700 
4701 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4702 {
4703   PetscErrorCode      ierr;
4704   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4705 
4706   PetscFunctionBegin;
4707   if (!merge) PetscFunctionReturn(0);
4708   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4709   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4710   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4711   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4712   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4713   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4714   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4715   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4716   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4717   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4718   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4719   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4720   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4721   ierr = PetscFree(merge);CHKERRQ(ierr);
4722   PetscFunctionReturn(0);
4723 }
4724 
4725 #include <../src/mat/utils/freespace.h>
4726 #include <petscbt.h>
4727 
4728 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4729 {
4730   PetscErrorCode      ierr;
4731   MPI_Comm            comm;
4732   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4733   PetscMPIInt         size,rank,taga,*len_s;
4734   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4735   PetscInt            proc,m;
4736   PetscInt            **buf_ri,**buf_rj;
4737   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4738   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4739   MPI_Request         *s_waits,*r_waits;
4740   MPI_Status          *status;
4741   MatScalar           *aa=a->a;
4742   MatScalar           **abuf_r,*ba_i;
4743   Mat_Merge_SeqsToMPI *merge;
4744   PetscContainer      container;
4745 
4746   PetscFunctionBegin;
4747   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4748   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4749 
4750   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4751   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4752 
4753   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4754   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4755   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4756 
4757   bi     = merge->bi;
4758   bj     = merge->bj;
4759   buf_ri = merge->buf_ri;
4760   buf_rj = merge->buf_rj;
4761 
4762   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4763   owners = merge->rowmap->range;
4764   len_s  = merge->len_s;
4765 
4766   /* send and recv matrix values */
4767   /*-----------------------------*/
4768   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4769   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4770 
4771   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4772   for (proc=0,k=0; proc<size; proc++) {
4773     if (!len_s[proc]) continue;
4774     i    = owners[proc];
4775     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4776     k++;
4777   }
4778 
4779   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4780   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4781   ierr = PetscFree(status);CHKERRQ(ierr);
4782 
4783   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4784   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4785 
4786   /* insert mat values of mpimat */
4787   /*----------------------------*/
4788   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4789   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4790 
4791   for (k=0; k<merge->nrecv; k++) {
4792     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4793     nrows       = *(buf_ri_k[k]);
4794     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4795     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4796   }
4797 
4798   /* set values of ba */
4799   m = merge->rowmap->n;
4800   for (i=0; i<m; i++) {
4801     arow = owners[rank] + i;
4802     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4803     bnzi = bi[i+1] - bi[i];
4804     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4805 
4806     /* add local non-zero vals of this proc's seqmat into ba */
4807     anzi   = ai[arow+1] - ai[arow];
4808     aj     = a->j + ai[arow];
4809     aa     = a->a + ai[arow];
4810     nextaj = 0;
4811     for (j=0; nextaj<anzi; j++) {
4812       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4813         ba_i[j] += aa[nextaj++];
4814       }
4815     }
4816 
4817     /* add received vals into ba */
4818     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4819       /* i-th row */
4820       if (i == *nextrow[k]) {
4821         anzi   = *(nextai[k]+1) - *nextai[k];
4822         aj     = buf_rj[k] + *(nextai[k]);
4823         aa     = abuf_r[k] + *(nextai[k]);
4824         nextaj = 0;
4825         for (j=0; nextaj<anzi; j++) {
4826           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4827             ba_i[j] += aa[nextaj++];
4828           }
4829         }
4830         nextrow[k]++; nextai[k]++;
4831       }
4832     }
4833     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4834   }
4835   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4836   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4837 
4838   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4839   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4840   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4841   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4842   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4843   PetscFunctionReturn(0);
4844 }
4845 
4846 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4847 {
4848   PetscErrorCode      ierr;
4849   Mat                 B_mpi;
4850   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4851   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4852   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4853   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4854   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4855   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4856   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4857   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4858   MPI_Status          *status;
4859   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4860   PetscBT             lnkbt;
4861   Mat_Merge_SeqsToMPI *merge;
4862   PetscContainer      container;
4863 
4864   PetscFunctionBegin;
4865   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4866 
4867   /* make sure it is a PETSc comm */
4868   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4869   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4870   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4871 
4872   ierr = PetscNew(&merge);CHKERRQ(ierr);
4873   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4874 
4875   /* determine row ownership */
4876   /*---------------------------------------------------------*/
4877   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4878   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4879   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4880   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4881   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4882   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4883   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4884 
4885   m      = merge->rowmap->n;
4886   owners = merge->rowmap->range;
4887 
4888   /* determine the number of messages to send, their lengths */
4889   /*---------------------------------------------------------*/
4890   len_s = merge->len_s;
4891 
4892   len          = 0; /* length of buf_si[] */
4893   merge->nsend = 0;
4894   for (proc=0; proc<size; proc++) {
4895     len_si[proc] = 0;
4896     if (proc == rank) {
4897       len_s[proc] = 0;
4898     } else {
4899       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4900       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4901     }
4902     if (len_s[proc]) {
4903       merge->nsend++;
4904       nrows = 0;
4905       for (i=owners[proc]; i<owners[proc+1]; i++) {
4906         if (ai[i+1] > ai[i]) nrows++;
4907       }
4908       len_si[proc] = 2*(nrows+1);
4909       len         += len_si[proc];
4910     }
4911   }
4912 
4913   /* determine the number and length of messages to receive for ij-structure */
4914   /*-------------------------------------------------------------------------*/
4915   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4916   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4917 
4918   /* post the Irecv of j-structure */
4919   /*-------------------------------*/
4920   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4921   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4922 
4923   /* post the Isend of j-structure */
4924   /*--------------------------------*/
4925   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4926 
4927   for (proc=0, k=0; proc<size; proc++) {
4928     if (!len_s[proc]) continue;
4929     i    = owners[proc];
4930     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4931     k++;
4932   }
4933 
4934   /* receives and sends of j-structure are complete */
4935   /*------------------------------------------------*/
4936   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4937   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4938 
4939   /* send and recv i-structure */
4940   /*---------------------------*/
4941   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4942   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4943 
4944   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4945   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4946   for (proc=0,k=0; proc<size; proc++) {
4947     if (!len_s[proc]) continue;
4948     /* form outgoing message for i-structure:
4949          buf_si[0]:                 nrows to be sent
4950                [1:nrows]:           row index (global)
4951                [nrows+1:2*nrows+1]: i-structure index
4952     */
4953     /*-------------------------------------------*/
4954     nrows       = len_si[proc]/2 - 1;
4955     buf_si_i    = buf_si + nrows+1;
4956     buf_si[0]   = nrows;
4957     buf_si_i[0] = 0;
4958     nrows       = 0;
4959     for (i=owners[proc]; i<owners[proc+1]; i++) {
4960       anzi = ai[i+1] - ai[i];
4961       if (anzi) {
4962         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4963         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4964         nrows++;
4965       }
4966     }
4967     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4968     k++;
4969     buf_si += len_si[proc];
4970   }
4971 
4972   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4973   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4974 
4975   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4976   for (i=0; i<merge->nrecv; i++) {
4977     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4978   }
4979 
4980   ierr = PetscFree(len_si);CHKERRQ(ierr);
4981   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4982   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4983   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4984   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4985   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4986   ierr = PetscFree(status);CHKERRQ(ierr);
4987 
4988   /* compute a local seq matrix in each processor */
4989   /*----------------------------------------------*/
4990   /* allocate bi array and free space for accumulating nonzero column info */
4991   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4992   bi[0] = 0;
4993 
4994   /* create and initialize a linked list */
4995   nlnk = N+1;
4996   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4997 
4998   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4999   len  = ai[owners[rank+1]] - ai[owners[rank]];
5000   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
5001 
5002   current_space = free_space;
5003 
5004   /* determine symbolic info for each local row */
5005   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
5006 
5007   for (k=0; k<merge->nrecv; k++) {
5008     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5009     nrows       = *buf_ri_k[k];
5010     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5011     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5012   }
5013 
5014   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5015   len  = 0;
5016   for (i=0; i<m; i++) {
5017     bnzi = 0;
5018     /* add local non-zero cols of this proc's seqmat into lnk */
5019     arow  = owners[rank] + i;
5020     anzi  = ai[arow+1] - ai[arow];
5021     aj    = a->j + ai[arow];
5022     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5023     bnzi += nlnk;
5024     /* add received col data into lnk */
5025     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5026       if (i == *nextrow[k]) { /* i-th row */
5027         anzi  = *(nextai[k]+1) - *nextai[k];
5028         aj    = buf_rj[k] + *nextai[k];
5029         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5030         bnzi += nlnk;
5031         nextrow[k]++; nextai[k]++;
5032       }
5033     }
5034     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5035 
5036     /* if free space is not available, make more free space */
5037     if (current_space->local_remaining<bnzi) {
5038       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5039       nspacedouble++;
5040     }
5041     /* copy data into free space, then initialize lnk */
5042     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5043     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5044 
5045     current_space->array           += bnzi;
5046     current_space->local_used      += bnzi;
5047     current_space->local_remaining -= bnzi;
5048 
5049     bi[i+1] = bi[i] + bnzi;
5050   }
5051 
5052   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5053 
5054   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5055   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5056   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5057 
5058   /* create symbolic parallel matrix B_mpi */
5059   /*---------------------------------------*/
5060   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5061   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5062   if (n==PETSC_DECIDE) {
5063     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5064   } else {
5065     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5066   }
5067   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5068   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5069   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5070   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5071   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5072 
5073   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5074   B_mpi->assembled  = PETSC_FALSE;
5075   merge->bi         = bi;
5076   merge->bj         = bj;
5077   merge->buf_ri     = buf_ri;
5078   merge->buf_rj     = buf_rj;
5079   merge->coi        = NULL;
5080   merge->coj        = NULL;
5081   merge->owners_co  = NULL;
5082 
5083   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5084 
5085   /* attach the supporting struct to B_mpi for reuse */
5086   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5087   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5088   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5089   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5090   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5091   *mpimat = B_mpi;
5092 
5093   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5094   PetscFunctionReturn(0);
5095 }
5096 
5097 /*@C
5098       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5099                  matrices from each processor
5100 
5101     Collective
5102 
5103    Input Parameters:
5104 +    comm - the communicators the parallel matrix will live on
5105 .    seqmat - the input sequential matrices
5106 .    m - number of local rows (or PETSC_DECIDE)
5107 .    n - number of local columns (or PETSC_DECIDE)
5108 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5109 
5110    Output Parameter:
5111 .    mpimat - the parallel matrix generated
5112 
5113     Level: advanced
5114 
5115    Notes:
5116      The dimensions of the sequential matrix in each processor MUST be the same.
5117      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5118      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5119 @*/
5120 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5121 {
5122   PetscErrorCode ierr;
5123   PetscMPIInt    size;
5124 
5125   PetscFunctionBegin;
5126   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5127   if (size == 1) {
5128     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5129     if (scall == MAT_INITIAL_MATRIX) {
5130       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5131     } else {
5132       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5133     }
5134     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5135     PetscFunctionReturn(0);
5136   }
5137   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5138   if (scall == MAT_INITIAL_MATRIX) {
5139     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5140   }
5141   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5142   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5143   PetscFunctionReturn(0);
5144 }
5145 
5146 /*@
5147      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5148           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5149           with MatGetSize()
5150 
5151     Not Collective
5152 
5153    Input Parameters:
5154 +    A - the matrix
5155 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5156 
5157    Output Parameter:
5158 .    A_loc - the local sequential matrix generated
5159 
5160     Level: developer
5161 
5162    Notes:
5163      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5164      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5165      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5166      modify the values of the returned A_loc.
5167 
5168 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5169 
5170 @*/
5171 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5172 {
5173   PetscErrorCode ierr;
5174   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5175   Mat_SeqAIJ     *mat,*a,*b;
5176   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5177   MatScalar      *aa,*ba,*cam;
5178   PetscScalar    *ca;
5179   PetscMPIInt    size;
5180   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5181   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5182   PetscBool      match;
5183 
5184   PetscFunctionBegin;
5185   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5186   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5187   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5188   if (size == 1) {
5189     if (scall == MAT_INITIAL_MATRIX) {
5190       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5191       *A_loc = mpimat->A;
5192     } else if (scall == MAT_REUSE_MATRIX) {
5193       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5194     }
5195     PetscFunctionReturn(0);
5196   }
5197 
5198   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5199   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5200   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5201   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5202   aa = a->a; ba = b->a;
5203   if (scall == MAT_INITIAL_MATRIX) {
5204     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5205     ci[0] = 0;
5206     for (i=0; i<am; i++) {
5207       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5208     }
5209     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5210     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5211     k    = 0;
5212     for (i=0; i<am; i++) {
5213       ncols_o = bi[i+1] - bi[i];
5214       ncols_d = ai[i+1] - ai[i];
5215       /* off-diagonal portion of A */
5216       for (jo=0; jo<ncols_o; jo++) {
5217         col = cmap[*bj];
5218         if (col >= cstart) break;
5219         cj[k]   = col; bj++;
5220         ca[k++] = *ba++;
5221       }
5222       /* diagonal portion of A */
5223       for (j=0; j<ncols_d; j++) {
5224         cj[k]   = cstart + *aj++;
5225         ca[k++] = *aa++;
5226       }
5227       /* off-diagonal portion of A */
5228       for (j=jo; j<ncols_o; j++) {
5229         cj[k]   = cmap[*bj++];
5230         ca[k++] = *ba++;
5231       }
5232     }
5233     /* put together the new matrix */
5234     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5235     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5236     /* Since these are PETSc arrays, change flags to free them as necessary. */
5237     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5238     mat->free_a  = PETSC_TRUE;
5239     mat->free_ij = PETSC_TRUE;
5240     mat->nonew   = 0;
5241   } else if (scall == MAT_REUSE_MATRIX) {
5242     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5243     ci = mat->i; cj = mat->j; cam = mat->a;
5244     for (i=0; i<am; i++) {
5245       /* off-diagonal portion of A */
5246       ncols_o = bi[i+1] - bi[i];
5247       for (jo=0; jo<ncols_o; jo++) {
5248         col = cmap[*bj];
5249         if (col >= cstart) break;
5250         *cam++ = *ba++; bj++;
5251       }
5252       /* diagonal portion of A */
5253       ncols_d = ai[i+1] - ai[i];
5254       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5255       /* off-diagonal portion of A */
5256       for (j=jo; j<ncols_o; j++) {
5257         *cam++ = *ba++; bj++;
5258       }
5259     }
5260   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5261   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5262   PetscFunctionReturn(0);
5263 }
5264 
5265 /*@C
5266      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5267 
5268     Not Collective
5269 
5270    Input Parameters:
5271 +    A - the matrix
5272 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5273 -    row, col - index sets of rows and columns to extract (or NULL)
5274 
5275    Output Parameter:
5276 .    A_loc - the local sequential matrix generated
5277 
5278     Level: developer
5279 
5280 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5281 
5282 @*/
5283 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5284 {
5285   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5286   PetscErrorCode ierr;
5287   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5288   IS             isrowa,iscola;
5289   Mat            *aloc;
5290   PetscBool      match;
5291 
5292   PetscFunctionBegin;
5293   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5294   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5295   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5296   if (!row) {
5297     start = A->rmap->rstart; end = A->rmap->rend;
5298     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5299   } else {
5300     isrowa = *row;
5301   }
5302   if (!col) {
5303     start = A->cmap->rstart;
5304     cmap  = a->garray;
5305     nzA   = a->A->cmap->n;
5306     nzB   = a->B->cmap->n;
5307     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5308     ncols = 0;
5309     for (i=0; i<nzB; i++) {
5310       if (cmap[i] < start) idx[ncols++] = cmap[i];
5311       else break;
5312     }
5313     imark = i;
5314     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5315     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5316     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5317   } else {
5318     iscola = *col;
5319   }
5320   if (scall != MAT_INITIAL_MATRIX) {
5321     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5322     aloc[0] = *A_loc;
5323   }
5324   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5325   if (!col) { /* attach global id of condensed columns */
5326     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5327   }
5328   *A_loc = aloc[0];
5329   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5330   if (!row) {
5331     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5332   }
5333   if (!col) {
5334     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5335   }
5336   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5337   PetscFunctionReturn(0);
5338 }
5339 
5340 /*
5341  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5342  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5343  * on a global size.
5344  * */
5345 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5346 {
5347   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5348   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5349   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5350   PetscMPIInt              owner;
5351   PetscSFNode              *iremote,*oiremote;
5352   const PetscInt           *lrowindices;
5353   PetscErrorCode           ierr;
5354   PetscSF                  sf,osf;
5355   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5356   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5357   MPI_Comm                 comm;
5358   ISLocalToGlobalMapping   mapping;
5359 
5360   PetscFunctionBegin;
5361   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5362   /* plocalsize is the number of roots
5363    * nrows is the number of leaves
5364    * */
5365   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5366   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5367   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5368   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5369   for (i=0;i<nrows;i++) {
5370     /* Find a remote index and an owner for a row
5371      * The row could be local or remote
5372      * */
5373     owner = 0;
5374     lidx  = 0;
5375     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5376     iremote[i].index = lidx;
5377     iremote[i].rank  = owner;
5378   }
5379   /* Create SF to communicate how many nonzero columns for each row */
5380   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5381   /* SF will figure out the number of nonzero colunms for each row, and their
5382    * offsets
5383    * */
5384   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5385   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5386   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5387 
5388   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5389   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5390   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5391   roffsets[0] = 0;
5392   roffsets[1] = 0;
5393   for (i=0;i<plocalsize;i++) {
5394     /* diag */
5395     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5396     /* off diag */
5397     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5398     /* compute offsets so that we relative location for each row */
5399     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5400     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5401   }
5402   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5403   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5404   /* 'r' means root, and 'l' means leaf */
5405   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5406   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5407   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5408   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5409   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5410   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5411   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5412   dntotalcols = 0;
5413   ontotalcols = 0;
5414   ncol = 0;
5415   for (i=0;i<nrows;i++) {
5416     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5417     ncol = PetscMax(pnnz[i],ncol);
5418     /* diag */
5419     dntotalcols += nlcols[i*2+0];
5420     /* off diag */
5421     ontotalcols += nlcols[i*2+1];
5422   }
5423   /* We do not need to figure the right number of columns
5424    * since all the calculations will be done by going through the raw data
5425    * */
5426   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5427   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5428   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5429   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5430   /* diag */
5431   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5432   /* off diag */
5433   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5434   /* diag */
5435   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5436   /* off diag */
5437   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5438   dntotalcols = 0;
5439   ontotalcols = 0;
5440   ntotalcols  = 0;
5441   for (i=0;i<nrows;i++) {
5442     owner = 0;
5443     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5444     /* Set iremote for diag matrix */
5445     for (j=0;j<nlcols[i*2+0];j++) {
5446       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5447       iremote[dntotalcols].rank    = owner;
5448       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5449       ilocal[dntotalcols++]        = ntotalcols++;
5450     }
5451     /* off diag */
5452     for (j=0;j<nlcols[i*2+1];j++) {
5453       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5454       oiremote[ontotalcols].rank    = owner;
5455       oilocal[ontotalcols++]        = ntotalcols++;
5456     }
5457   }
5458   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5459   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5460   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5461   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5462   /* P serves as roots and P_oth is leaves
5463    * Diag matrix
5464    * */
5465   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5466   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5467   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5468 
5469   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5470   /* Off diag */
5471   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5472   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5473   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5474   /* We operate on the matrix internal data for saving memory */
5475   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5476   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5477   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5478   /* Convert to global indices for diag matrix */
5479   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5480   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5481   /* We want P_oth store global indices */
5482   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5483   /* Use memory scalable approach */
5484   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5485   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5486   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5487   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5488   /* Convert back to local indices */
5489   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5490   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5491   nout = 0;
5492   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5493   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5494   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5495   /* Exchange values */
5496   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5497   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5498   /* Stop PETSc from shrinking memory */
5499   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5500   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5501   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5502   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5503   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5504   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5505   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5506   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5507   PetscFunctionReturn(0);
5508 }
5509 
5510 /*
5511  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5512  * This supports MPIAIJ and MAIJ
5513  * */
5514 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5515 {
5516   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5517   Mat_SeqAIJ            *p_oth;
5518   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5519   IS                    rows,map;
5520   PetscHMapI            hamp;
5521   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5522   MPI_Comm              comm;
5523   PetscSF               sf,osf;
5524   PetscBool             has;
5525   PetscErrorCode        ierr;
5526 
5527   PetscFunctionBegin;
5528   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5529   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5530   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5531    *  and then create a submatrix (that often is an overlapping matrix)
5532    * */
5533   if (reuse == MAT_INITIAL_MATRIX) {
5534     /* Use a hash table to figure out unique keys */
5535     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5536     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5537     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5538     count = 0;
5539     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5540     for (i=0;i<a->B->cmap->n;i++) {
5541       key  = a->garray[i]/dof;
5542       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5543       if (!has) {
5544         mapping[i] = count;
5545         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5546       } else {
5547         /* Current 'i' has the same value the previous step */
5548         mapping[i] = count-1;
5549       }
5550     }
5551     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5552     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5553     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5554     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5555     off = 0;
5556     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5557     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5558     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5559     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5560     /* In case, the matrix was already created but users want to recreate the matrix */
5561     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5562     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5563     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5564     ierr = ISDestroy(&map);CHKERRQ(ierr);
5565     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5566   } else if (reuse == MAT_REUSE_MATRIX) {
5567     /* If matrix was already created, we simply update values using SF objects
5568      * that as attached to the matrix ealier.
5569      *  */
5570     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5571     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5572     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5573     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5574     /* Update values in place */
5575     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5576     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5577     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5578     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5579   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5580   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5581   PetscFunctionReturn(0);
5582 }
5583 
5584 /*@C
5585     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5586 
5587     Collective on Mat
5588 
5589    Input Parameters:
5590 +    A,B - the matrices in mpiaij format
5591 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5592 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5593 
5594    Output Parameter:
5595 +    rowb, colb - index sets of rows and columns of B to extract
5596 -    B_seq - the sequential matrix generated
5597 
5598     Level: developer
5599 
5600 @*/
5601 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5602 {
5603   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5604   PetscErrorCode ierr;
5605   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5606   IS             isrowb,iscolb;
5607   Mat            *bseq=NULL;
5608 
5609   PetscFunctionBegin;
5610   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5611     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5612   }
5613   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5614 
5615   if (scall == MAT_INITIAL_MATRIX) {
5616     start = A->cmap->rstart;
5617     cmap  = a->garray;
5618     nzA   = a->A->cmap->n;
5619     nzB   = a->B->cmap->n;
5620     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5621     ncols = 0;
5622     for (i=0; i<nzB; i++) {  /* row < local row index */
5623       if (cmap[i] < start) idx[ncols++] = cmap[i];
5624       else break;
5625     }
5626     imark = i;
5627     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5628     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5629     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5630     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5631   } else {
5632     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5633     isrowb  = *rowb; iscolb = *colb;
5634     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5635     bseq[0] = *B_seq;
5636   }
5637   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5638   *B_seq = bseq[0];
5639   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5640   if (!rowb) {
5641     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5642   } else {
5643     *rowb = isrowb;
5644   }
5645   if (!colb) {
5646     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5647   } else {
5648     *colb = iscolb;
5649   }
5650   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5651   PetscFunctionReturn(0);
5652 }
5653 
5654 /*
5655     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5656     of the OFF-DIAGONAL portion of local A
5657 
5658     Collective on Mat
5659 
5660    Input Parameters:
5661 +    A,B - the matrices in mpiaij format
5662 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5663 
5664    Output Parameter:
5665 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5666 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5667 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5668 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5669 
5670     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5671      for this matrix. This is not desirable..
5672 
5673     Level: developer
5674 
5675 */
5676 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5677 {
5678   PetscErrorCode         ierr;
5679   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5680   Mat_SeqAIJ             *b_oth;
5681   VecScatter             ctx;
5682   MPI_Comm               comm;
5683   const PetscMPIInt      *rprocs,*sprocs;
5684   const PetscInt         *srow,*rstarts,*sstarts;
5685   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5686   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5687   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5688   MPI_Request            *rwaits = NULL,*swaits = NULL;
5689   MPI_Status             rstatus;
5690   PetscMPIInt            size,tag,rank,nsends_mpi,nrecvs_mpi;
5691   PETSC_UNUSED PetscMPIInt jj;
5692 
5693   PetscFunctionBegin;
5694   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5695   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5696 
5697   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5698     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5699   }
5700   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5701   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5702 
5703   if (size == 1) {
5704     startsj_s = NULL;
5705     bufa_ptr  = NULL;
5706     *B_oth    = NULL;
5707     PetscFunctionReturn(0);
5708   }
5709 
5710   ctx = a->Mvctx;
5711   tag = ((PetscObject)ctx)->tag;
5712 
5713   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5714   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5715   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5716   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5717   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5718   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5719 
5720   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5721   if (scall == MAT_INITIAL_MATRIX) {
5722     /* i-array */
5723     /*---------*/
5724     /*  post receives */
5725     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5726     for (i=0; i<nrecvs; i++) {
5727       rowlen = rvalues + rstarts[i]*rbs;
5728       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5729       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5730     }
5731 
5732     /* pack the outgoing message */
5733     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5734 
5735     sstartsj[0] = 0;
5736     rstartsj[0] = 0;
5737     len         = 0; /* total length of j or a array to be sent */
5738     if (nsends) {
5739       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5740       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5741     }
5742     for (i=0; i<nsends; i++) {
5743       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5744       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5745       for (j=0; j<nrows; j++) {
5746         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5747         for (l=0; l<sbs; l++) {
5748           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5749 
5750           rowlen[j*sbs+l] = ncols;
5751 
5752           len += ncols;
5753           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5754         }
5755         k++;
5756       }
5757       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5758 
5759       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5760     }
5761     /* recvs and sends of i-array are completed */
5762     i = nrecvs;
5763     while (i--) {
5764       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5765     }
5766     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5767     ierr = PetscFree(svalues);CHKERRQ(ierr);
5768 
5769     /* allocate buffers for sending j and a arrays */
5770     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5771     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5772 
5773     /* create i-array of B_oth */
5774     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5775 
5776     b_othi[0] = 0;
5777     len       = 0; /* total length of j or a array to be received */
5778     k         = 0;
5779     for (i=0; i<nrecvs; i++) {
5780       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5781       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5782       for (j=0; j<nrows; j++) {
5783         b_othi[k+1] = b_othi[k] + rowlen[j];
5784         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5785         k++;
5786       }
5787       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5788     }
5789     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5790 
5791     /* allocate space for j and a arrrays of B_oth */
5792     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5793     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5794 
5795     /* j-array */
5796     /*---------*/
5797     /*  post receives of j-array */
5798     for (i=0; i<nrecvs; i++) {
5799       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5800       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5801     }
5802 
5803     /* pack the outgoing message j-array */
5804     if (nsends) k = sstarts[0];
5805     for (i=0; i<nsends; i++) {
5806       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5807       bufJ  = bufj+sstartsj[i];
5808       for (j=0; j<nrows; j++) {
5809         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5810         for (ll=0; ll<sbs; ll++) {
5811           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5812           for (l=0; l<ncols; l++) {
5813             *bufJ++ = cols[l];
5814           }
5815           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5816         }
5817       }
5818       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5819     }
5820 
5821     /* recvs and sends of j-array are completed */
5822     i = nrecvs;
5823     while (i--) {
5824       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5825     }
5826     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5827   } else if (scall == MAT_REUSE_MATRIX) {
5828     sstartsj = *startsj_s;
5829     rstartsj = *startsj_r;
5830     bufa     = *bufa_ptr;
5831     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5832     b_otha   = b_oth->a;
5833   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5834 
5835   /* a-array */
5836   /*---------*/
5837   /*  post receives of a-array */
5838   for (i=0; i<nrecvs; i++) {
5839     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5840     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5841   }
5842 
5843   /* pack the outgoing message a-array */
5844   if (nsends) k = sstarts[0];
5845   for (i=0; i<nsends; i++) {
5846     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5847     bufA  = bufa+sstartsj[i];
5848     for (j=0; j<nrows; j++) {
5849       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5850       for (ll=0; ll<sbs; ll++) {
5851         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5852         for (l=0; l<ncols; l++) {
5853           *bufA++ = vals[l];
5854         }
5855         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5856       }
5857     }
5858     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5859   }
5860   /* recvs and sends of a-array are completed */
5861   i = nrecvs;
5862   while (i--) {
5863     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5864   }
5865   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5866   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5867 
5868   if (scall == MAT_INITIAL_MATRIX) {
5869     /* put together the new matrix */
5870     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5871 
5872     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5873     /* Since these are PETSc arrays, change flags to free them as necessary. */
5874     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5875     b_oth->free_a  = PETSC_TRUE;
5876     b_oth->free_ij = PETSC_TRUE;
5877     b_oth->nonew   = 0;
5878 
5879     ierr = PetscFree(bufj);CHKERRQ(ierr);
5880     if (!startsj_s || !bufa_ptr) {
5881       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5882       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5883     } else {
5884       *startsj_s = sstartsj;
5885       *startsj_r = rstartsj;
5886       *bufa_ptr  = bufa;
5887     }
5888   }
5889 
5890   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5891   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5892   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5893   PetscFunctionReturn(0);
5894 }
5895 
5896 /*@C
5897   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5898 
5899   Not Collective
5900 
5901   Input Parameters:
5902 . A - The matrix in mpiaij format
5903 
5904   Output Parameter:
5905 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5906 . colmap - A map from global column index to local index into lvec
5907 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5908 
5909   Level: developer
5910 
5911 @*/
5912 #if defined(PETSC_USE_CTABLE)
5913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5914 #else
5915 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5916 #endif
5917 {
5918   Mat_MPIAIJ *a;
5919 
5920   PetscFunctionBegin;
5921   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5922   PetscValidPointer(lvec, 2);
5923   PetscValidPointer(colmap, 3);
5924   PetscValidPointer(multScatter, 4);
5925   a = (Mat_MPIAIJ*) A->data;
5926   if (lvec) *lvec = a->lvec;
5927   if (colmap) *colmap = a->colmap;
5928   if (multScatter) *multScatter = a->Mvctx;
5929   PetscFunctionReturn(0);
5930 }
5931 
5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5935 #if defined(PETSC_HAVE_MKL_SPARSE)
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5937 #endif
5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5940 #if defined(PETSC_HAVE_ELEMENTAL)
5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5942 #endif
5943 #if defined(PETSC_HAVE_SCALAPACK)
5944 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5945 #endif
5946 #if defined(PETSC_HAVE_HYPRE)
5947 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5948 #endif
5949 #if defined(PETSC_HAVE_CUDA)
5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5951 #endif
5952 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5954 #endif
5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5956 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5957 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5958 
5959 /*
5960     Computes (B'*A')' since computing B*A directly is untenable
5961 
5962                n                       p                          p
5963         [             ]       [             ]         [                 ]
5964       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5965         [             ]       [             ]         [                 ]
5966 
5967 */
5968 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5969 {
5970   PetscErrorCode ierr;
5971   Mat            At,Bt,Ct;
5972 
5973   PetscFunctionBegin;
5974   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5975   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5976   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5977   ierr = MatDestroy(&At);CHKERRQ(ierr);
5978   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5979   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5980   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5981   PetscFunctionReturn(0);
5982 }
5983 
5984 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5985 {
5986   PetscErrorCode ierr;
5987   PetscBool      cisdense;
5988 
5989   PetscFunctionBegin;
5990   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5991   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5992   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5993   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5994   if (!cisdense) {
5995     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5996   }
5997   ierr = MatSetUp(C);CHKERRQ(ierr);
5998 
5999   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6000   PetscFunctionReturn(0);
6001 }
6002 
6003 /* ----------------------------------------------------------------*/
6004 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6005 {
6006   Mat_Product *product = C->product;
6007   Mat         A = product->A,B=product->B;
6008 
6009   PetscFunctionBegin;
6010   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6011     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6012 
6013   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6014   C->ops->productsymbolic = MatProductSymbolic_AB;
6015   PetscFunctionReturn(0);
6016 }
6017 
6018 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6019 {
6020   PetscErrorCode ierr;
6021   Mat_Product    *product = C->product;
6022 
6023   PetscFunctionBegin;
6024   if (product->type == MATPRODUCT_AB) {
6025     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6026   }
6027   PetscFunctionReturn(0);
6028 }
6029 /* ----------------------------------------------------------------*/
6030 
6031 /*MC
6032    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6033 
6034    Options Database Keys:
6035 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6036 
6037    Level: beginner
6038 
6039    Notes:
6040     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6041     in this case the values associated with the rows and columns one passes in are set to zero
6042     in the matrix
6043 
6044     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6045     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6046 
6047 .seealso: MatCreateAIJ()
6048 M*/
6049 
6050 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6051 {
6052   Mat_MPIAIJ     *b;
6053   PetscErrorCode ierr;
6054   PetscMPIInt    size;
6055 
6056   PetscFunctionBegin;
6057   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6058 
6059   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6060   B->data       = (void*)b;
6061   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6062   B->assembled  = PETSC_FALSE;
6063   B->insertmode = NOT_SET_VALUES;
6064   b->size       = size;
6065 
6066   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6067 
6068   /* build cache for off array entries formed */
6069   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6070 
6071   b->donotstash  = PETSC_FALSE;
6072   b->colmap      = NULL;
6073   b->garray      = NULL;
6074   b->roworiented = PETSC_TRUE;
6075 
6076   /* stuff used for matrix vector multiply */
6077   b->lvec  = NULL;
6078   b->Mvctx = NULL;
6079 
6080   /* stuff for MatGetRow() */
6081   b->rowindices   = NULL;
6082   b->rowvalues    = NULL;
6083   b->getrowactive = PETSC_FALSE;
6084 
6085   /* flexible pointer used in CUSPARSE classes */
6086   b->spptr = NULL;
6087 
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6098 #if defined(PETSC_HAVE_CUDA)
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6100 #endif
6101 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6103 #endif
6104 #if defined(PETSC_HAVE_MKL_SPARSE)
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6106 #endif
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6110 #if defined(PETSC_HAVE_ELEMENTAL)
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6112 #endif
6113 #if defined(PETSC_HAVE_SCALAPACK)
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6115 #endif
6116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6118 #if defined(PETSC_HAVE_HYPRE)
6119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6121 #endif
6122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6124   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6125   PetscFunctionReturn(0);
6126 }
6127 
6128 /*@C
6129      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6130          and "off-diagonal" part of the matrix in CSR format.
6131 
6132    Collective
6133 
6134    Input Parameters:
6135 +  comm - MPI communicator
6136 .  m - number of local rows (Cannot be PETSC_DECIDE)
6137 .  n - This value should be the same as the local size used in creating the
6138        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6139        calculated if N is given) For square matrices n is almost always m.
6140 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6141 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6142 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6143 .   j - column indices
6144 .   a - matrix values
6145 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6146 .   oj - column indices
6147 -   oa - matrix values
6148 
6149    Output Parameter:
6150 .   mat - the matrix
6151 
6152    Level: advanced
6153 
6154    Notes:
6155        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6156        must free the arrays once the matrix has been destroyed and not before.
6157 
6158        The i and j indices are 0 based
6159 
6160        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6161 
6162        This sets local rows and cannot be used to set off-processor values.
6163 
6164        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6165        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6166        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6167        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6168        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6169        communication if it is known that only local entries will be set.
6170 
6171 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6172           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6173 @*/
6174 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6175 {
6176   PetscErrorCode ierr;
6177   Mat_MPIAIJ     *maij;
6178 
6179   PetscFunctionBegin;
6180   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6181   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6182   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6183   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6184   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6185   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6186   maij = (Mat_MPIAIJ*) (*mat)->data;
6187 
6188   (*mat)->preallocated = PETSC_TRUE;
6189 
6190   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6191   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6192 
6193   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6194   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6195 
6196   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6197   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6198   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6199   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6200 
6201   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6202   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6203   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6204   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6205   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6206   PetscFunctionReturn(0);
6207 }
6208 
6209 /*
6210     Special version for direct calls from Fortran
6211 */
6212 #include <petsc/private/fortranimpl.h>
6213 
6214 /* Change these macros so can be used in void function */
6215 #undef CHKERRQ
6216 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6217 #undef SETERRQ2
6218 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6219 #undef SETERRQ3
6220 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6221 #undef SETERRQ
6222 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6223 
6224 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6225 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6226 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6227 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6228 #else
6229 #endif
6230 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6231 {
6232   Mat            mat  = *mmat;
6233   PetscInt       m    = *mm, n = *mn;
6234   InsertMode     addv = *maddv;
6235   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6236   PetscScalar    value;
6237   PetscErrorCode ierr;
6238 
6239   MatCheckPreallocated(mat,1);
6240   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6241   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6242   {
6243     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6244     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6245     PetscBool roworiented = aij->roworiented;
6246 
6247     /* Some Variables required in the macro */
6248     Mat        A                    = aij->A;
6249     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6250     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6251     MatScalar  *aa                  = a->a;
6252     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6253     Mat        B                    = aij->B;
6254     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6255     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6256     MatScalar  *ba                  = b->a;
6257     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6258      * cannot use "#if defined" inside a macro. */
6259     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6260 
6261     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6262     PetscInt  nonew = a->nonew;
6263     MatScalar *ap1,*ap2;
6264 
6265     PetscFunctionBegin;
6266     for (i=0; i<m; i++) {
6267       if (im[i] < 0) continue;
6268       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6269       if (im[i] >= rstart && im[i] < rend) {
6270         row      = im[i] - rstart;
6271         lastcol1 = -1;
6272         rp1      = aj + ai[row];
6273         ap1      = aa + ai[row];
6274         rmax1    = aimax[row];
6275         nrow1    = ailen[row];
6276         low1     = 0;
6277         high1    = nrow1;
6278         lastcol2 = -1;
6279         rp2      = bj + bi[row];
6280         ap2      = ba + bi[row];
6281         rmax2    = bimax[row];
6282         nrow2    = bilen[row];
6283         low2     = 0;
6284         high2    = nrow2;
6285 
6286         for (j=0; j<n; j++) {
6287           if (roworiented) value = v[i*n+j];
6288           else value = v[i+j*m];
6289           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6290           if (in[j] >= cstart && in[j] < cend) {
6291             col = in[j] - cstart;
6292             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6293 #if defined(PETSC_HAVE_DEVICE)
6294             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6295 #endif
6296           } else if (in[j] < 0) continue;
6297           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6298             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6299             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6300           } else {
6301             if (mat->was_assembled) {
6302               if (!aij->colmap) {
6303                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6304               }
6305 #if defined(PETSC_USE_CTABLE)
6306               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6307               col--;
6308 #else
6309               col = aij->colmap[in[j]] - 1;
6310 #endif
6311               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6312                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6313                 col  =  in[j];
6314                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6315                 B        = aij->B;
6316                 b        = (Mat_SeqAIJ*)B->data;
6317                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6318                 rp2      = bj + bi[row];
6319                 ap2      = ba + bi[row];
6320                 rmax2    = bimax[row];
6321                 nrow2    = bilen[row];
6322                 low2     = 0;
6323                 high2    = nrow2;
6324                 bm       = aij->B->rmap->n;
6325                 ba       = b->a;
6326                 inserted = PETSC_FALSE;
6327               }
6328             } else col = in[j];
6329             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6330 #if defined(PETSC_HAVE_DEVICE)
6331             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6332 #endif
6333           }
6334         }
6335       } else if (!aij->donotstash) {
6336         if (roworiented) {
6337           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6338         } else {
6339           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6340         }
6341       }
6342     }
6343   }
6344   PetscFunctionReturnVoid();
6345 }
6346