xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 930e68a5a1dbab7612595fd12ba2e3af4e1d5d80)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRMPI(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRMPI(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRMPI(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRMPI(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRMPI(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRMPI(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRMPI(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRMPI(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRMPI(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to access.
426 */
427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_DEVICE)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_DEVICE)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_DEVICE)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800 
801   PetscFunctionBegin;
802   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
803 
804   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
805   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
806   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
807   PetscFunctionReturn(0);
808 }
809 
810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
811 {
812   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
813   PetscErrorCode ierr;
814   PetscMPIInt    n;
815   PetscInt       i,j,rstart,ncols,flg;
816   PetscInt       *row,*col;
817   PetscBool      other_disassembled;
818   PetscScalar    *val;
819 
820   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
821 
822   PetscFunctionBegin;
823   if (!aij->donotstash && !mat->nooffprocentries) {
824     while (1) {
825       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
826       if (!flg) break;
827 
828       for (i=0; i<n;) {
829         /* Now identify the consecutive vals belonging to the same row */
830         for (j=i,rstart=row[j]; j<n; j++) {
831           if (row[j] != rstart) break;
832         }
833         if (j < n) ncols = j-i;
834         else       ncols = n-i;
835         /* Now assemble all these values with a single function call */
836         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
837         i    = j;
838       }
839     }
840     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
841   }
842 #if defined(PETSC_HAVE_DEVICE)
843   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
844   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
845   if (mat->boundtocpu) {
846     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
847     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
848   }
849 #endif
850   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
851   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
852 
853   /* determine if any processor has disassembled, if so we must
854      also disassemble ourself, in order that we may reassemble. */
855   /*
856      if nonzero structure of submatrix B cannot change then we know that
857      no processor disassembled thus we can skip this stuff
858   */
859   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
860     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
861     if (mat->was_assembled && !other_disassembled) {
862 #if defined(PETSC_HAVE_DEVICE)
863       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
864 #endif
865       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
866     }
867   }
868   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
869     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
870   }
871   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
872 #if defined(PETSC_HAVE_DEVICE)
873   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
874 #endif
875   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
876   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
877 
878   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
879 
880   aij->rowvalues = NULL;
881 
882   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
883 
884   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
885   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
886     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
887     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
888   }
889 #if defined(PETSC_HAVE_DEVICE)
890   mat->offloadmask = PETSC_OFFLOAD_BOTH;
891 #endif
892   PetscFunctionReturn(0);
893 }
894 
895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
896 {
897   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
902   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
909   PetscObjectState sA, sB;
910   PetscInt        *lrows;
911   PetscInt         r, len;
912   PetscBool        cong, lch, gch;
913   PetscErrorCode   ierr;
914 
915   PetscFunctionBegin;
916   /* get locally owned rows */
917   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
918   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
919   /* fix right hand side if needed */
920   if (x && b) {
921     const PetscScalar *xx;
922     PetscScalar       *bb;
923 
924     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
925     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
926     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
927     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
928     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
929     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
930   }
931 
932   sA = mat->A->nonzerostate;
933   sB = mat->B->nonzerostate;
934 
935   if (diag != 0.0 && cong) {
936     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
937     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
938   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
939     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
940     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
941     PetscInt   nnwA, nnwB;
942     PetscBool  nnzA, nnzB;
943 
944     nnwA = aijA->nonew;
945     nnwB = aijB->nonew;
946     nnzA = aijA->keepnonzeropattern;
947     nnzB = aijB->keepnonzeropattern;
948     if (!nnzA) {
949       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
950       aijA->nonew = 0;
951     }
952     if (!nnzB) {
953       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
954       aijB->nonew = 0;
955     }
956     /* Must zero here before the next loop */
957     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
959     for (r = 0; r < len; ++r) {
960       const PetscInt row = lrows[r] + A->rmap->rstart;
961       if (row >= A->cmap->N) continue;
962       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
963     }
964     aijA->nonew = nnwA;
965     aijB->nonew = nnwB;
966   } else {
967     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
968     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
969   }
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973 
974   /* reduce nonzerostate */
975   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
976   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
977   if (gch) A->nonzerostate++;
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
982 {
983   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode    ierr;
985   PetscMPIInt       n = A->rmap->n;
986   PetscInt          i,j,r,m,len = 0;
987   PetscInt          *lrows,*owners = A->rmap->range;
988   PetscMPIInt       p = 0;
989   PetscSFNode       *rrows;
990   PetscSF           sf;
991   const PetscScalar *xx;
992   PetscScalar       *bb,*mask;
993   Vec               xmask,lmask;
994   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
995   const PetscInt    *aj, *ii,*ridx;
996   PetscScalar       *aa;
997 
998   PetscFunctionBegin;
999   /* Create SF where leaves are input rows and roots are owned rows */
1000   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1001   for (r = 0; r < n; ++r) lrows[r] = -1;
1002   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1003   for (r = 0; r < N; ++r) {
1004     const PetscInt idx   = rows[r];
1005     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1006     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1007       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1008     }
1009     rrows[r].rank  = p;
1010     rrows[r].index = rows[r] - owners[p];
1011   }
1012   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1013   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1014   /* Collect flags for rows to be zeroed */
1015   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1016   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1017   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1018   /* Compress and put in row numbers */
1019   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1020   /* zero diagonal part of matrix */
1021   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1022   /* handle off diagonal part of matrix */
1023   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x && b) { /* this code is buggy when the row and column layout don't match */
1032     PetscBool cong;
1033 
1034     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1035     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1036     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1039     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1040   }
1041   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1042   /* remove zeroed rows of off diagonal matrix */
1043   ii = aij->i;
1044   for (i=0; i<len; i++) {
1045     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1046   }
1047   /* loop over all elements of off process part of matrix zeroing removed columns*/
1048   if (aij->compressedrow.use) {
1049     m    = aij->compressedrow.nrows;
1050     ii   = aij->compressedrow.i;
1051     ridx = aij->compressedrow.rindex;
1052     for (i=0; i<m; i++) {
1053       n  = ii[i+1] - ii[i];
1054       aj = aij->j + ii[i];
1055       aa = aij->a + ii[i];
1056 
1057       for (j=0; j<n; j++) {
1058         if (PetscAbsScalar(mask[*aj])) {
1059           if (b) bb[*ridx] -= *aa*xx[*aj];
1060           *aa = 0.0;
1061         }
1062         aa++;
1063         aj++;
1064       }
1065       ridx++;
1066     }
1067   } else { /* do not use compressed row format */
1068     m = l->B->rmap->n;
1069     for (i=0; i<m; i++) {
1070       n  = ii[i+1] - ii[i];
1071       aj = aij->j + ii[i];
1072       aa = aij->a + ii[i];
1073       for (j=0; j<n; j++) {
1074         if (PetscAbsScalar(mask[*aj])) {
1075           if (b) bb[i] -= *aa*xx[*aj];
1076           *aa = 0.0;
1077         }
1078         aa++;
1079         aj++;
1080       }
1081     }
1082   }
1083   if (x && b) {
1084     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1085     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1086   }
1087   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1088   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1089   ierr = PetscFree(lrows);CHKERRQ(ierr);
1090 
1091   /* only change matrix nonzero state if pattern was allowed to be changed */
1092   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1093     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1094     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1100 {
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102   PetscErrorCode ierr;
1103   PetscInt       nt;
1104   VecScatter     Mvctx = a->Mvctx;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1108   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1109   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1111   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1127 {
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129   PetscErrorCode ierr;
1130   VecScatter     Mvctx = a->Mvctx;
1131 
1132   PetscFunctionBegin;
1133   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1135   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1141 {
1142   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1143   PetscErrorCode ierr;
1144 
1145   PetscFunctionBegin;
1146   /* do nondiagonal part */
1147   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1148   /* do local part */
1149   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1150   /* add partial results together */
1151   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1152   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1157 {
1158   MPI_Comm       comm;
1159   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1160   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1161   IS             Me,Notme;
1162   PetscErrorCode ierr;
1163   PetscInt       M,N,first,last,*notme,i;
1164   PetscBool      lf;
1165   PetscMPIInt    size;
1166 
1167   PetscFunctionBegin;
1168   /* Easy test: symmetric diagonal block */
1169   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1170   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1171   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1172   if (!*f) PetscFunctionReturn(0);
1173   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1174   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1175   if (size == 1) PetscFunctionReturn(0);
1176 
1177   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1178   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1179   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1180   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1181   for (i=0; i<first; i++) notme[i] = i;
1182   for (i=last; i<M; i++) notme[i-last+first] = i;
1183   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1184   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1185   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1186   Aoff = Aoffs[0];
1187   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1188   Boff = Boffs[0];
1189   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1194   ierr = PetscFree(notme);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1199 {
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1208 {
1209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   /* do nondiagonal part */
1214   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1215   /* do local part */
1216   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1217   /* add partial results together */
1218   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /*
1224   This only works correctly for square matrices where the subblock A->A is the
1225    diagonal block
1226 */
1227 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1228 {
1229   PetscErrorCode ierr;
1230   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1231 
1232   PetscFunctionBegin;
1233   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1234   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1235   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1240 {
1241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1242   PetscErrorCode ierr;
1243 
1244   PetscFunctionBegin;
1245   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1246   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1251 {
1252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256 #if defined(PETSC_USE_LOG)
1257   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1258 #endif
1259   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1263 #if defined(PETSC_USE_CTABLE)
1264   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1265 #else
1266   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1267 #endif
1268   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1269   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1270   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1271   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1272   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1273   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1274 
1275   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1276   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1277 
1278   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1288 #if defined(PETSC_HAVE_CUDA)
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1290 #endif
1291 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1293 #endif
1294 #if defined(PETSC_HAVE_ELEMENTAL)
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1296 #endif
1297 #if defined(PETSC_HAVE_SCALAPACK)
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1299 #endif
1300 #if defined(PETSC_HAVE_HYPRE)
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1303 #endif
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1310 #if defined(PETSC_HAVE_MKL_SPARSE)
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1312 #endif
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1320 {
1321   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1322   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1323   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1324   const PetscInt    *garray = aij->garray;
1325   const PetscScalar *aa,*ba;
1326   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1327   PetscInt          *rowlens;
1328   PetscInt          *colidxs;
1329   PetscScalar       *matvals;
1330   PetscErrorCode    ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1334 
1335   M  = mat->rmap->N;
1336   N  = mat->cmap->N;
1337   m  = mat->rmap->n;
1338   rs = mat->rmap->rstart;
1339   cs = mat->cmap->rstart;
1340   nz = A->nz + B->nz;
1341 
1342   /* write matrix header */
1343   header[0] = MAT_FILE_CLASSID;
1344   header[1] = M; header[2] = N; header[3] = nz;
1345   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1346   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1347 
1348   /* fill in and store row lengths  */
1349   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1350   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1351   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1352   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1353 
1354   /* fill in and store column indices */
1355   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1356   for (cnt=0, i=0; i<m; i++) {
1357     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1358       if (garray[B->j[jb]] > cs) break;
1359       colidxs[cnt++] = garray[B->j[jb]];
1360     }
1361     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1362       colidxs[cnt++] = A->j[ja] + cs;
1363     for (; jb<B->i[i+1]; jb++)
1364       colidxs[cnt++] = garray[B->j[jb]];
1365   }
1366   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1367   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1368   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1369 
1370   /* fill in and store nonzero values */
1371   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1372   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1373   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1374   for (cnt=0, i=0; i<m; i++) {
1375     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1376       if (garray[B->j[jb]] > cs) break;
1377       matvals[cnt++] = ba[jb];
1378     }
1379     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1380       matvals[cnt++] = aa[ja];
1381     for (; jb<B->i[i+1]; jb++)
1382       matvals[cnt++] = ba[jb];
1383   }
1384   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1385   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1386   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1387   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1388   ierr = PetscFree(matvals);CHKERRQ(ierr);
1389 
1390   /* write block size option to the viewer's .info file */
1391   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #include <petscdraw.h>
1396 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1397 {
1398   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1399   PetscErrorCode    ierr;
1400   PetscMPIInt       rank = aij->rank,size = aij->size;
1401   PetscBool         isdraw,iascii,isbinary;
1402   PetscViewer       sviewer;
1403   PetscViewerFormat format;
1404 
1405   PetscFunctionBegin;
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1407   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1409   if (iascii) {
1410     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1411     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1412       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1413       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1414       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1415       for (i=0; i<(PetscInt)size; i++) {
1416         nmax = PetscMax(nmax,nz[i]);
1417         nmin = PetscMin(nmin,nz[i]);
1418         navg += nz[i];
1419       }
1420       ierr = PetscFree(nz);CHKERRQ(ierr);
1421       navg = navg/size;
1422       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1423       PetscFunctionReturn(0);
1424     }
1425     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1426     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1427       MatInfo   info;
1428       PetscBool inodes;
1429 
1430       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1431       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1432       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1433       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1434       if (!inodes) {
1435         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1436                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1437       } else {
1438         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1439                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1440       }
1441       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1442       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1443       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1444       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1445       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1446       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1447       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1448       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1449       PetscFunctionReturn(0);
1450     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1451       PetscInt inodecount,inodelimit,*inodes;
1452       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1453       if (inodes) {
1454         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1455       } else {
1456         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1457       }
1458       PetscFunctionReturn(0);
1459     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1460       PetscFunctionReturn(0);
1461     }
1462   } else if (isbinary) {
1463     if (size == 1) {
1464       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1465       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1466     } else {
1467       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1468     }
1469     PetscFunctionReturn(0);
1470   } else if (iascii && size == 1) {
1471     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1472     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1473     PetscFunctionReturn(0);
1474   } else if (isdraw) {
1475     PetscDraw draw;
1476     PetscBool isnull;
1477     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1478     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1479     if (isnull) PetscFunctionReturn(0);
1480   }
1481 
1482   { /* assemble the entire matrix onto first processor */
1483     Mat A = NULL, Av;
1484     IS  isrow,iscol;
1485 
1486     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1487     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1488     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1489     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1490 /*  The commented code uses MatCreateSubMatrices instead */
1491 /*
1492     Mat *AA, A = NULL, Av;
1493     IS  isrow,iscol;
1494 
1495     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1496     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1497     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1498     if (!rank) {
1499        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1500        A    = AA[0];
1501        Av   = AA[0];
1502     }
1503     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1504 */
1505     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1506     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1507     /*
1508        Everyone has to call to draw the matrix since the graphics waits are
1509        synchronized across all processors that share the PetscDraw object
1510     */
1511     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1512     if (!rank) {
1513       if (((PetscObject)mat)->name) {
1514         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1515       }
1516       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1517     }
1518     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1519     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1520     ierr = MatDestroy(&A);CHKERRQ(ierr);
1521   }
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1526 {
1527   PetscErrorCode ierr;
1528   PetscBool      iascii,isdraw,issocket,isbinary;
1529 
1530   PetscFunctionBegin;
1531   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1532   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1533   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1534   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1535   if (iascii || isdraw || isbinary || issocket) {
1536     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1537   }
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1542 {
1543   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1544   PetscErrorCode ierr;
1545   Vec            bb1 = NULL;
1546   PetscBool      hasop;
1547 
1548   PetscFunctionBegin;
1549   if (flag == SOR_APPLY_UPPER) {
1550     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1551     PetscFunctionReturn(0);
1552   }
1553 
1554   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1555     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1556   }
1557 
1558   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1559     if (flag & SOR_ZERO_INITIAL_GUESS) {
1560       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1561       its--;
1562     }
1563 
1564     while (its--) {
1565       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1566       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1567 
1568       /* update rhs: bb1 = bb - B*x */
1569       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1570       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1571 
1572       /* local sweep */
1573       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1574     }
1575   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1576     if (flag & SOR_ZERO_INITIAL_GUESS) {
1577       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1578       its--;
1579     }
1580     while (its--) {
1581       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583 
1584       /* update rhs: bb1 = bb - B*x */
1585       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1586       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1587 
1588       /* local sweep */
1589       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1590     }
1591   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1592     if (flag & SOR_ZERO_INITIAL_GUESS) {
1593       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1594       its--;
1595     }
1596     while (its--) {
1597       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1598       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1599 
1600       /* update rhs: bb1 = bb - B*x */
1601       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1602       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1603 
1604       /* local sweep */
1605       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1606     }
1607   } else if (flag & SOR_EISENSTAT) {
1608     Vec xx1;
1609 
1610     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1611     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1612 
1613     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1615     if (!mat->diag) {
1616       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1617       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1618     }
1619     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1620     if (hasop) {
1621       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1622     } else {
1623       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1624     }
1625     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1626 
1627     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1628 
1629     /* local sweep */
1630     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1631     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1632     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1633   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1634 
1635   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1636 
1637   matin->factorerrortype = mat->A->factorerrortype;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1642 {
1643   Mat            aA,aB,Aperm;
1644   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1645   PetscScalar    *aa,*ba;
1646   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1647   PetscSF        rowsf,sf;
1648   IS             parcolp = NULL;
1649   PetscBool      done;
1650   PetscErrorCode ierr;
1651 
1652   PetscFunctionBegin;
1653   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1654   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1655   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1656   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1657 
1658   /* Invert row permutation to find out where my rows should go */
1659   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1660   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1661   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1663   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1664   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1665 
1666   /* Invert column permutation to find out where my columns should go */
1667   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1668   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1669   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1670   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1671   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1672   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1674 
1675   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1676   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1677   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1678 
1679   /* Find out where my gcols should go */
1680   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1681   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1682   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1683   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1684   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1685   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1686   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1687   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1688 
1689   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1690   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1691   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1692   for (i=0; i<m; i++) {
1693     PetscInt    row = rdest[i];
1694     PetscMPIInt rowner;
1695     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1696     for (j=ai[i]; j<ai[i+1]; j++) {
1697       PetscInt    col = cdest[aj[j]];
1698       PetscMPIInt cowner;
1699       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1700       if (rowner == cowner) dnnz[i]++;
1701       else onnz[i]++;
1702     }
1703     for (j=bi[i]; j<bi[i+1]; j++) {
1704       PetscInt    col = gcdest[bj[j]];
1705       PetscMPIInt cowner;
1706       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1707       if (rowner == cowner) dnnz[i]++;
1708       else onnz[i]++;
1709     }
1710   }
1711   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1712   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1713   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1714   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1715   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1716 
1717   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1718   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1719   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1720   for (i=0; i<m; i++) {
1721     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1722     PetscInt j0,rowlen;
1723     rowlen = ai[i+1] - ai[i];
1724     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1725       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1726       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1727     }
1728     rowlen = bi[i+1] - bi[i];
1729     for (j0=j=0; j<rowlen; j0=j) {
1730       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1731       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1732     }
1733   }
1734   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1735   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1736   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1737   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1738   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1739   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1740   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1741   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1742   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1743   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1744   *B = Aperm;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1749 {
1750   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1755   if (ghosts) *ghosts = aij->garray;
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1760 {
1761   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1762   Mat            A    = mat->A,B = mat->B;
1763   PetscErrorCode ierr;
1764   PetscLogDouble isend[5],irecv[5];
1765 
1766   PetscFunctionBegin;
1767   info->block_size = 1.0;
1768   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1769 
1770   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1771   isend[3] = info->memory;  isend[4] = info->mallocs;
1772 
1773   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1774 
1775   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1776   isend[3] += info->memory;  isend[4] += info->mallocs;
1777   if (flag == MAT_LOCAL) {
1778     info->nz_used      = isend[0];
1779     info->nz_allocated = isend[1];
1780     info->nz_unneeded  = isend[2];
1781     info->memory       = isend[3];
1782     info->mallocs      = isend[4];
1783   } else if (flag == MAT_GLOBAL_MAX) {
1784     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1785 
1786     info->nz_used      = irecv[0];
1787     info->nz_allocated = irecv[1];
1788     info->nz_unneeded  = irecv[2];
1789     info->memory       = irecv[3];
1790     info->mallocs      = irecv[4];
1791   } else if (flag == MAT_GLOBAL_SUM) {
1792     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1793 
1794     info->nz_used      = irecv[0];
1795     info->nz_allocated = irecv[1];
1796     info->nz_unneeded  = irecv[2];
1797     info->memory       = irecv[3];
1798     info->mallocs      = irecv[4];
1799   }
1800   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1801   info->fill_ratio_needed = 0;
1802   info->factor_mallocs    = 0;
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1807 {
1808   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1809   PetscErrorCode ierr;
1810 
1811   PetscFunctionBegin;
1812   switch (op) {
1813   case MAT_NEW_NONZERO_LOCATIONS:
1814   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1815   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1816   case MAT_KEEP_NONZERO_PATTERN:
1817   case MAT_NEW_NONZERO_LOCATION_ERR:
1818   case MAT_USE_INODES:
1819   case MAT_IGNORE_ZERO_ENTRIES:
1820     MatCheckPreallocated(A,1);
1821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1822     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1823     break;
1824   case MAT_ROW_ORIENTED:
1825     MatCheckPreallocated(A,1);
1826     a->roworiented = flg;
1827 
1828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1829     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1830     break;
1831   case MAT_FORCE_DIAGONAL_ENTRIES:
1832   case MAT_SORTED_FULL:
1833     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1834     break;
1835   case MAT_IGNORE_OFF_PROC_ENTRIES:
1836     a->donotstash = flg;
1837     break;
1838   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1839   case MAT_SPD:
1840   case MAT_SYMMETRIC:
1841   case MAT_STRUCTURALLY_SYMMETRIC:
1842   case MAT_HERMITIAN:
1843   case MAT_SYMMETRY_ETERNAL:
1844     break;
1845   case MAT_SUBMAT_SINGLEIS:
1846     A->submat_singleis = flg;
1847     break;
1848   case MAT_STRUCTURE_ONLY:
1849     /* The option is handled directly by MatSetOption() */
1850     break;
1851   default:
1852     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1853   }
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1858 {
1859   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1860   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1861   PetscErrorCode ierr;
1862   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1863   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1864   PetscInt       *cmap,*idx_p;
1865 
1866   PetscFunctionBegin;
1867   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1868   mat->getrowactive = PETSC_TRUE;
1869 
1870   if (!mat->rowvalues && (idx || v)) {
1871     /*
1872         allocate enough space to hold information from the longest row.
1873     */
1874     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1875     PetscInt   max = 1,tmp;
1876     for (i=0; i<matin->rmap->n; i++) {
1877       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1878       if (max < tmp) max = tmp;
1879     }
1880     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1881   }
1882 
1883   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1884   lrow = row - rstart;
1885 
1886   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1887   if (!v)   {pvA = NULL; pvB = NULL;}
1888   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1889   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1890   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1891   nztot = nzA + nzB;
1892 
1893   cmap = mat->garray;
1894   if (v  || idx) {
1895     if (nztot) {
1896       /* Sort by increasing column numbers, assuming A and B already sorted */
1897       PetscInt imark = -1;
1898       if (v) {
1899         *v = v_p = mat->rowvalues;
1900         for (i=0; i<nzB; i++) {
1901           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1902           else break;
1903         }
1904         imark = i;
1905         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1906         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1907       }
1908       if (idx) {
1909         *idx = idx_p = mat->rowindices;
1910         if (imark > -1) {
1911           for (i=0; i<imark; i++) {
1912             idx_p[i] = cmap[cworkB[i]];
1913           }
1914         } else {
1915           for (i=0; i<nzB; i++) {
1916             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1917             else break;
1918           }
1919           imark = i;
1920         }
1921         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1922         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1923       }
1924     } else {
1925       if (idx) *idx = NULL;
1926       if (v)   *v   = NULL;
1927     }
1928   }
1929   *nz  = nztot;
1930   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1931   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1936 {
1937   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1938 
1939   PetscFunctionBegin;
1940   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1941   aij->getrowactive = PETSC_FALSE;
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1946 {
1947   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1948   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1949   PetscErrorCode ierr;
1950   PetscInt       i,j,cstart = mat->cmap->rstart;
1951   PetscReal      sum = 0.0;
1952   MatScalar      *v;
1953 
1954   PetscFunctionBegin;
1955   if (aij->size == 1) {
1956     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1957   } else {
1958     if (type == NORM_FROBENIUS) {
1959       v = amat->a;
1960       for (i=0; i<amat->nz; i++) {
1961         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1962       }
1963       v = bmat->a;
1964       for (i=0; i<bmat->nz; i++) {
1965         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1966       }
1967       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1968       *norm = PetscSqrtReal(*norm);
1969       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1970     } else if (type == NORM_1) { /* max column norm */
1971       PetscReal *tmp,*tmp2;
1972       PetscInt  *jj,*garray = aij->garray;
1973       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1974       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1975       *norm = 0.0;
1976       v     = amat->a; jj = amat->j;
1977       for (j=0; j<amat->nz; j++) {
1978         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1979       }
1980       v = bmat->a; jj = bmat->j;
1981       for (j=0; j<bmat->nz; j++) {
1982         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1983       }
1984       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1985       for (j=0; j<mat->cmap->N; j++) {
1986         if (tmp2[j] > *norm) *norm = tmp2[j];
1987       }
1988       ierr = PetscFree(tmp);CHKERRQ(ierr);
1989       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1990       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1991     } else if (type == NORM_INFINITY) { /* max row norm */
1992       PetscReal ntemp = 0.0;
1993       for (j=0; j<aij->A->rmap->n; j++) {
1994         v   = amat->a + amat->i[j];
1995         sum = 0.0;
1996         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1997           sum += PetscAbsScalar(*v); v++;
1998         }
1999         v = bmat->a + bmat->i[j];
2000         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         if (sum > ntemp) ntemp = sum;
2004       }
2005       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2006       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2007     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2013 {
2014   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2015   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2016   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2017   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2018   PetscErrorCode  ierr;
2019   Mat             B,A_diag,*B_diag;
2020   const MatScalar *array;
2021 
2022   PetscFunctionBegin;
2023   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2024   ai = Aloc->i; aj = Aloc->j;
2025   bi = Bloc->i; bj = Bloc->j;
2026   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2027     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2028     PetscSFNode          *oloc;
2029     PETSC_UNUSED PetscSF sf;
2030 
2031     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2032     /* compute d_nnz for preallocation */
2033     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2034     for (i=0; i<ai[ma]; i++) {
2035       d_nnz[aj[i]]++;
2036     }
2037     /* compute local off-diagonal contributions */
2038     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2039     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2040     /* map those to global */
2041     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2042     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2043     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2044     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2045     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2046     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2047     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2048 
2049     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2050     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2051     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2052     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2053     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2054     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2055   } else {
2056     B    = *matout;
2057     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2058   }
2059 
2060   b           = (Mat_MPIAIJ*)B->data;
2061   A_diag      = a->A;
2062   B_diag      = &b->A;
2063   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2064   A_diag_ncol = A_diag->cmap->N;
2065   B_diag_ilen = sub_B_diag->ilen;
2066   B_diag_i    = sub_B_diag->i;
2067 
2068   /* Set ilen for diagonal of B */
2069   for (i=0; i<A_diag_ncol; i++) {
2070     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2071   }
2072 
2073   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2074   very quickly (=without using MatSetValues), because all writes are local. */
2075   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2076 
2077   /* copy over the B part */
2078   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2079   array = Bloc->a;
2080   row   = A->rmap->rstart;
2081   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2082   cols_tmp = cols;
2083   for (i=0; i<mb; i++) {
2084     ncol = bi[i+1]-bi[i];
2085     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2086     row++;
2087     array += ncol; cols_tmp += ncol;
2088   }
2089   ierr = PetscFree(cols);CHKERRQ(ierr);
2090 
2091   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2093   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2094     *matout = B;
2095   } else {
2096     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2097   }
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2102 {
2103   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2104   Mat            a    = aij->A,b = aij->B;
2105   PetscErrorCode ierr;
2106   PetscInt       s1,s2,s3;
2107 
2108   PetscFunctionBegin;
2109   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2110   if (rr) {
2111     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2112     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2113     /* Overlap communication with computation. */
2114     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2115   }
2116   if (ll) {
2117     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2118     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2119     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2120   }
2121   /* scale  the diagonal block */
2122   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2123 
2124   if (rr) {
2125     /* Do a scatter end and then right scale the off-diagonal block */
2126     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2127     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2133 {
2134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2143 {
2144   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2145   Mat            a,b,c,d;
2146   PetscBool      flg;
2147   PetscErrorCode ierr;
2148 
2149   PetscFunctionBegin;
2150   a = matA->A; b = matA->B;
2151   c = matB->A; d = matB->B;
2152 
2153   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2154   if (flg) {
2155     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2156   }
2157   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2162 {
2163   PetscErrorCode ierr;
2164   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2165   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2166 
2167   PetscFunctionBegin;
2168   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2169   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2170     /* because of the column compression in the off-processor part of the matrix a->B,
2171        the number of columns in a->B and b->B may be different, hence we cannot call
2172        the MatCopy() directly on the two parts. If need be, we can provide a more
2173        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2174        then copying the submatrices */
2175     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2176   } else {
2177     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2178     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2179   }
2180   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2185 {
2186   PetscErrorCode ierr;
2187 
2188   PetscFunctionBegin;
2189   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /*
2194    Computes the number of nonzeros per row needed for preallocation when X and Y
2195    have different nonzero structure.
2196 */
2197 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2198 {
2199   PetscInt       i,j,k,nzx,nzy;
2200 
2201   PetscFunctionBegin;
2202   /* Set the number of nonzeros in the new matrix */
2203   for (i=0; i<m; i++) {
2204     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2205     nzx = xi[i+1] - xi[i];
2206     nzy = yi[i+1] - yi[i];
2207     nnz[i] = 0;
2208     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2209       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2210       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2211       nnz[i]++;
2212     }
2213     for (; k<nzy; k++) nnz[i]++;
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2219 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2220 {
2221   PetscErrorCode ierr;
2222   PetscInt       m = Y->rmap->N;
2223   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2224   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2225 
2226   PetscFunctionBegin;
2227   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2232 {
2233   PetscErrorCode ierr;
2234   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2235 
2236   PetscFunctionBegin;
2237   if (str == SAME_NONZERO_PATTERN) {
2238     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2239     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2240   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2241     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2242   } else {
2243     Mat      B;
2244     PetscInt *nnz_d,*nnz_o;
2245 
2246     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2247     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2248     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2249     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2250     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2251     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2266 {
2267 #if defined(PETSC_USE_COMPLEX)
2268   PetscErrorCode ierr;
2269   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2270 
2271   PetscFunctionBegin;
2272   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2273   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2274 #else
2275   PetscFunctionBegin;
2276 #endif
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2292 {
2293   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2298   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2303 {
2304   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2305   PetscErrorCode    ierr;
2306   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2307   PetscScalar       *va,*vv;
2308   Vec               vB,vA;
2309   const PetscScalar *vb;
2310 
2311   PetscFunctionBegin;
2312   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2313   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2314 
2315   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2316   if (idx) {
2317     for (i=0; i<m; i++) {
2318       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2319     }
2320   }
2321 
2322   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2323   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2324   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2325 
2326   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2327   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2328   for (i=0; i<m; i++) {
2329     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2330       vv[i] = vb[i];
2331       if (idx) idx[i] = a->garray[idxb[i]];
2332     } else {
2333       vv[i] = va[i];
2334       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2335         idx[i] = a->garray[idxb[i]];
2336     }
2337   }
2338   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2339   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2340   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2341   ierr = PetscFree(idxb);CHKERRQ(ierr);
2342   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2343   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2348 {
2349   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2350   PetscInt       m = A->rmap->n,n = A->cmap->n;
2351   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2352   PetscInt       *cmap  = mat->garray;
2353   PetscInt       *diagIdx, *offdiagIdx;
2354   Vec            diagV, offdiagV;
2355   PetscScalar    *a, *diagA, *offdiagA, *ba;
2356   PetscInt       r,j,col,ncols,*bi,*bj;
2357   PetscErrorCode ierr;
2358   Mat            B = mat->B;
2359   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2360 
2361   PetscFunctionBegin;
2362   /* When a process holds entire A and other processes have no entry */
2363   if (A->cmap->N == n) {
2364     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2365     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2366     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2367     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2368     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2369     PetscFunctionReturn(0);
2370   } else if (n == 0) {
2371     if (m) {
2372       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2373       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2374       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2375     }
2376     PetscFunctionReturn(0);
2377   }
2378 
2379   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2380   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2381   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2382   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2383 
2384   /* Get offdiagIdx[] for implicit 0.0 */
2385   ba = b->a;
2386   bi = b->i;
2387   bj = b->j;
2388   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2389   for (r = 0; r < m; r++) {
2390     ncols = bi[r+1] - bi[r];
2391     if (ncols == A->cmap->N - n) { /* Brow is dense */
2392       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2393     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2394       offdiagA[r] = 0.0;
2395 
2396       /* Find first hole in the cmap */
2397       for (j=0; j<ncols; j++) {
2398         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2399         if (col > j && j < cstart) {
2400           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2401           break;
2402         } else if (col > j + n && j >= cstart) {
2403           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2404           break;
2405         }
2406       }
2407       if (j == ncols && ncols < A->cmap->N - n) {
2408         /* a hole is outside compressed Bcols */
2409         if (ncols == 0) {
2410           if (cstart) {
2411             offdiagIdx[r] = 0;
2412           } else offdiagIdx[r] = cend;
2413         } else { /* ncols > 0 */
2414           offdiagIdx[r] = cmap[ncols-1] + 1;
2415           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2416         }
2417       }
2418     }
2419 
2420     for (j=0; j<ncols; j++) {
2421       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2422       ba++; bj++;
2423     }
2424   }
2425 
2426   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2427   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2428   for (r = 0; r < m; ++r) {
2429     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2430       a[r]   = diagA[r];
2431       if (idx) idx[r] = cstart + diagIdx[r];
2432     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2433       a[r] = diagA[r];
2434       if (idx) {
2435         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2436           idx[r] = cstart + diagIdx[r];
2437         } else idx[r] = offdiagIdx[r];
2438       }
2439     } else {
2440       a[r]   = offdiagA[r];
2441       if (idx) idx[r] = offdiagIdx[r];
2442     }
2443   }
2444   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2445   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2446   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2447   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2448   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2449   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2454 {
2455   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2456   PetscInt       m = A->rmap->n,n = A->cmap->n;
2457   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2458   PetscInt       *cmap  = mat->garray;
2459   PetscInt       *diagIdx, *offdiagIdx;
2460   Vec            diagV, offdiagV;
2461   PetscScalar    *a, *diagA, *offdiagA, *ba;
2462   PetscInt       r,j,col,ncols,*bi,*bj;
2463   PetscErrorCode ierr;
2464   Mat            B = mat->B;
2465   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2466 
2467   PetscFunctionBegin;
2468   /* When a process holds entire A and other processes have no entry */
2469   if (A->cmap->N == n) {
2470     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2471     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2472     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2473     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2474     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2475     PetscFunctionReturn(0);
2476   } else if (n == 0) {
2477     if (m) {
2478       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2479       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2480       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2481     }
2482     PetscFunctionReturn(0);
2483   }
2484 
2485   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2488   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   ba = b->a;
2492   bi = b->i;
2493   bj = b->j;
2494   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2495   for (r = 0; r < m; r++) {
2496     ncols = bi[r+1] - bi[r];
2497     if (ncols == A->cmap->N - n) { /* Brow is dense */
2498       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2499     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2500       offdiagA[r] = 0.0;
2501 
2502       /* Find first hole in the cmap */
2503       for (j=0; j<ncols; j++) {
2504         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2505         if (col > j && j < cstart) {
2506           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2507           break;
2508         } else if (col > j + n && j >= cstart) {
2509           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2510           break;
2511         }
2512       }
2513       if (j == ncols && ncols < A->cmap->N - n) {
2514         /* a hole is outside compressed Bcols */
2515         if (ncols == 0) {
2516           if (cstart) {
2517             offdiagIdx[r] = 0;
2518           } else offdiagIdx[r] = cend;
2519         } else { /* ncols > 0 */
2520           offdiagIdx[r] = cmap[ncols-1] + 1;
2521           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2522         }
2523       }
2524     }
2525 
2526     for (j=0; j<ncols; j++) {
2527       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2528       ba++; bj++;
2529     }
2530   }
2531 
2532   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2533   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2534   for (r = 0; r < m; ++r) {
2535     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2536       a[r]   = diagA[r];
2537       if (idx) idx[r] = cstart + diagIdx[r];
2538     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2539       a[r] = diagA[r];
2540       if (idx) {
2541         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2542           idx[r] = cstart + diagIdx[r];
2543         } else idx[r] = offdiagIdx[r];
2544       }
2545     } else {
2546       a[r]   = offdiagA[r];
2547       if (idx) idx[r] = offdiagIdx[r];
2548     }
2549   }
2550   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2551   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2552   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2553   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2554   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2555   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2560 {
2561   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2562   PetscInt       m = A->rmap->n,n = A->cmap->n;
2563   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2564   PetscInt       *cmap  = mat->garray;
2565   PetscInt       *diagIdx, *offdiagIdx;
2566   Vec            diagV, offdiagV;
2567   PetscScalar    *a, *diagA, *offdiagA, *ba;
2568   PetscInt       r,j,col,ncols,*bi,*bj;
2569   PetscErrorCode ierr;
2570   Mat            B = mat->B;
2571   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2572 
2573   PetscFunctionBegin;
2574   /* When a process holds entire A and other processes have no entry */
2575   if (A->cmap->N == n) {
2576     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2577     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2578     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2579     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2580     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2581     PetscFunctionReturn(0);
2582   } else if (n == 0) {
2583     if (m) {
2584       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2585       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2586       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2587     }
2588     PetscFunctionReturn(0);
2589   }
2590 
2591   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2592   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2593   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2594   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2595 
2596   /* Get offdiagIdx[] for implicit 0.0 */
2597   ba = b->a;
2598   bi = b->i;
2599   bj = b->j;
2600   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2601   for (r = 0; r < m; r++) {
2602     ncols = bi[r+1] - bi[r];
2603     if (ncols == A->cmap->N - n) { /* Brow is dense */
2604       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2605     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2606       offdiagA[r] = 0.0;
2607 
2608       /* Find first hole in the cmap */
2609       for (j=0; j<ncols; j++) {
2610         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2611         if (col > j && j < cstart) {
2612           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2613           break;
2614         } else if (col > j + n && j >= cstart) {
2615           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2616           break;
2617         }
2618       }
2619       if (j == ncols && ncols < A->cmap->N - n) {
2620         /* a hole is outside compressed Bcols */
2621         if (ncols == 0) {
2622           if (cstart) {
2623             offdiagIdx[r] = 0;
2624           } else offdiagIdx[r] = cend;
2625         } else { /* ncols > 0 */
2626           offdiagIdx[r] = cmap[ncols-1] + 1;
2627           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2628         }
2629       }
2630     }
2631 
2632     for (j=0; j<ncols; j++) {
2633       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2634       ba++; bj++;
2635     }
2636   }
2637 
2638   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2639   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2640   for (r = 0; r < m; ++r) {
2641     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2642       a[r] = diagA[r];
2643       if (idx) idx[r] = cstart + diagIdx[r];
2644     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2645       a[r] = diagA[r];
2646       if (idx) {
2647         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2648           idx[r] = cstart + diagIdx[r];
2649         } else idx[r] = offdiagIdx[r];
2650       }
2651     } else {
2652       a[r] = offdiagA[r];
2653       if (idx) idx[r] = offdiagIdx[r];
2654     }
2655   }
2656   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2657   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2658   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2659   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2660   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2661   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2666 {
2667   PetscErrorCode ierr;
2668   Mat            *dummy;
2669 
2670   PetscFunctionBegin;
2671   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2672   *newmat = *dummy;
2673   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2678 {
2679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2680   PetscErrorCode ierr;
2681 
2682   PetscFunctionBegin;
2683   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2684   A->factorerrortype = a->A->factorerrortype;
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2689 {
2690   PetscErrorCode ierr;
2691   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2692 
2693   PetscFunctionBegin;
2694   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2695   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2696   if (x->assembled) {
2697     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2698   } else {
2699     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2700   }
2701   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2702   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2707 {
2708   PetscFunctionBegin;
2709   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2710   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 /*@
2715    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2716 
2717    Collective on Mat
2718 
2719    Input Parameters:
2720 +    A - the matrix
2721 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2722 
2723  Level: advanced
2724 
2725 @*/
2726 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2727 {
2728   PetscErrorCode       ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2736 {
2737   PetscErrorCode       ierr;
2738   PetscBool            sc = PETSC_FALSE,flg;
2739 
2740   PetscFunctionBegin;
2741   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2742   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2743   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2744   if (flg) {
2745     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2746   }
2747   ierr = PetscOptionsTail();CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2752 {
2753   PetscErrorCode ierr;
2754   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2755   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2756 
2757   PetscFunctionBegin;
2758   if (!Y->preallocated) {
2759     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2760   } else if (!aij->nz) {
2761     PetscInt nonew = aij->nonew;
2762     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2763     aij->nonew = nonew;
2764   }
2765   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2770 {
2771   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2772   PetscErrorCode ierr;
2773 
2774   PetscFunctionBegin;
2775   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2776   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2777   if (d) {
2778     PetscInt rstart;
2779     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2780     *d += rstart;
2781 
2782   }
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2787 {
2788   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 /* -------------------------------------------------------------------*/
2797 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2798                                        MatGetRow_MPIAIJ,
2799                                        MatRestoreRow_MPIAIJ,
2800                                        MatMult_MPIAIJ,
2801                                 /* 4*/ MatMultAdd_MPIAIJ,
2802                                        MatMultTranspose_MPIAIJ,
2803                                        MatMultTransposeAdd_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                 /*10*/ NULL,
2808                                        NULL,
2809                                        NULL,
2810                                        MatSOR_MPIAIJ,
2811                                        MatTranspose_MPIAIJ,
2812                                 /*15*/ MatGetInfo_MPIAIJ,
2813                                        MatEqual_MPIAIJ,
2814                                        MatGetDiagonal_MPIAIJ,
2815                                        MatDiagonalScale_MPIAIJ,
2816                                        MatNorm_MPIAIJ,
2817                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2818                                        MatAssemblyEnd_MPIAIJ,
2819                                        MatSetOption_MPIAIJ,
2820                                        MatZeroEntries_MPIAIJ,
2821                                 /*24*/ MatZeroRows_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        NULL,
2826                                 /*29*/ MatSetUp_MPIAIJ,
2827                                        NULL,
2828                                        NULL,
2829                                        MatGetDiagonalBlock_MPIAIJ,
2830                                        NULL,
2831                                 /*34*/ MatDuplicate_MPIAIJ,
2832                                        NULL,
2833                                        NULL,
2834                                        NULL,
2835                                        NULL,
2836                                 /*39*/ MatAXPY_MPIAIJ,
2837                                        MatCreateSubMatrices_MPIAIJ,
2838                                        MatIncreaseOverlap_MPIAIJ,
2839                                        MatGetValues_MPIAIJ,
2840                                        MatCopy_MPIAIJ,
2841                                 /*44*/ MatGetRowMax_MPIAIJ,
2842                                        MatScale_MPIAIJ,
2843                                        MatShift_MPIAIJ,
2844                                        MatDiagonalSet_MPIAIJ,
2845                                        MatZeroRowsColumns_MPIAIJ,
2846                                 /*49*/ MatSetRandom_MPIAIJ,
2847                                        NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        NULL,
2851                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2852                                        NULL,
2853                                        MatSetUnfactored_MPIAIJ,
2854                                        MatPermute_MPIAIJ,
2855                                        NULL,
2856                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2857                                        MatDestroy_MPIAIJ,
2858                                        MatView_MPIAIJ,
2859                                        NULL,
2860                                        NULL,
2861                                 /*64*/ NULL,
2862                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2863                                        NULL,
2864                                        NULL,
2865                                        NULL,
2866                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2867                                        MatGetRowMinAbs_MPIAIJ,
2868                                        NULL,
2869                                        NULL,
2870                                        NULL,
2871                                        NULL,
2872                                 /*75*/ MatFDColoringApply_AIJ,
2873                                        MatSetFromOptions_MPIAIJ,
2874                                        NULL,
2875                                        NULL,
2876                                        MatFindZeroDiagonals_MPIAIJ,
2877                                 /*80*/ NULL,
2878                                        NULL,
2879                                        NULL,
2880                                 /*83*/ MatLoad_MPIAIJ,
2881                                        MatIsSymmetric_MPIAIJ,
2882                                        NULL,
2883                                        NULL,
2884                                        NULL,
2885                                        NULL,
2886                                 /*89*/ NULL,
2887                                        NULL,
2888                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2889                                        NULL,
2890                                        NULL,
2891                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2892                                        NULL,
2893                                        NULL,
2894                                        NULL,
2895                                        MatBindToCPU_MPIAIJ,
2896                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2897                                        NULL,
2898                                        NULL,
2899                                        MatConjugate_MPIAIJ,
2900                                        NULL,
2901                                 /*104*/MatSetValuesRow_MPIAIJ,
2902                                        MatRealPart_MPIAIJ,
2903                                        MatImaginaryPart_MPIAIJ,
2904                                        NULL,
2905                                        NULL,
2906                                 /*109*/NULL,
2907                                        NULL,
2908                                        MatGetRowMin_MPIAIJ,
2909                                        NULL,
2910                                        MatMissingDiagonal_MPIAIJ,
2911                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2912                                        NULL,
2913                                        MatGetGhosts_MPIAIJ,
2914                                        NULL,
2915                                        NULL,
2916                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2917                                        NULL,
2918                                        NULL,
2919                                        NULL,
2920                                        MatGetMultiProcBlock_MPIAIJ,
2921                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2922                                        MatGetColumnNorms_MPIAIJ,
2923                                        MatInvertBlockDiagonal_MPIAIJ,
2924                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2925                                        MatCreateSubMatricesMPI_MPIAIJ,
2926                                 /*129*/NULL,
2927                                        NULL,
2928                                        NULL,
2929                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2930                                        NULL,
2931                                 /*134*/NULL,
2932                                        NULL,
2933                                        NULL,
2934                                        NULL,
2935                                        NULL,
2936                                 /*139*/MatSetBlockSizes_MPIAIJ,
2937                                        NULL,
2938                                        NULL,
2939                                        MatFDColoringSetUp_MPIXAIJ,
2940                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2941                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2942                                 /*145*/NULL,
2943                                        NULL,
2944                                        NULL
2945 };
2946 
2947 /* ----------------------------------------------------------------------------------------*/
2948 
2949 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2950 {
2951   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2952   PetscErrorCode ierr;
2953 
2954   PetscFunctionBegin;
2955   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2956   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2957   PetscFunctionReturn(0);
2958 }
2959 
2960 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2961 {
2962   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2963   PetscErrorCode ierr;
2964 
2965   PetscFunctionBegin;
2966   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2967   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2972 {
2973   Mat_MPIAIJ     *b;
2974   PetscErrorCode ierr;
2975   PetscMPIInt    size;
2976 
2977   PetscFunctionBegin;
2978   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2979   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2980   b = (Mat_MPIAIJ*)B->data;
2981 
2982 #if defined(PETSC_USE_CTABLE)
2983   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2984 #else
2985   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2986 #endif
2987   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2988   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2989   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2990 
2991   /* Because the B will have been resized we simply destroy it and create a new one each time */
2992   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2993   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2994   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2995   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2996   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2997   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2998   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2999 
3000   if (!B->preallocated) {
3001     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3002     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3003     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3004     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3005     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3006   }
3007 
3008   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3009   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3010   B->preallocated  = PETSC_TRUE;
3011   B->was_assembled = PETSC_FALSE;
3012   B->assembled     = PETSC_FALSE;
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3017 {
3018   Mat_MPIAIJ     *b;
3019   PetscErrorCode ierr;
3020 
3021   PetscFunctionBegin;
3022   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3023   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3024   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3025   b = (Mat_MPIAIJ*)B->data;
3026 
3027 #if defined(PETSC_USE_CTABLE)
3028   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3029 #else
3030   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3031 #endif
3032   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3033   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3034   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3035 
3036   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
3037   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
3038   B->preallocated  = PETSC_TRUE;
3039   B->was_assembled = PETSC_FALSE;
3040   B->assembled = PETSC_FALSE;
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3045 {
3046   Mat            mat;
3047   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3048   PetscErrorCode ierr;
3049 
3050   PetscFunctionBegin;
3051   *newmat = NULL;
3052   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3053   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3054   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3055   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3056   a       = (Mat_MPIAIJ*)mat->data;
3057 
3058   mat->factortype   = matin->factortype;
3059   mat->assembled    = matin->assembled;
3060   mat->insertmode   = NOT_SET_VALUES;
3061   mat->preallocated = matin->preallocated;
3062 
3063   a->size         = oldmat->size;
3064   a->rank         = oldmat->rank;
3065   a->donotstash   = oldmat->donotstash;
3066   a->roworiented  = oldmat->roworiented;
3067   a->rowindices   = NULL;
3068   a->rowvalues    = NULL;
3069   a->getrowactive = PETSC_FALSE;
3070 
3071   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3072   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3073 
3074   if (oldmat->colmap) {
3075 #if defined(PETSC_USE_CTABLE)
3076     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3077 #else
3078     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3079     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3080     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3081 #endif
3082   } else a->colmap = NULL;
3083   if (oldmat->garray) {
3084     PetscInt len;
3085     len  = oldmat->B->cmap->n;
3086     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3087     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3088     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3089   } else a->garray = NULL;
3090 
3091   /* It may happen MatDuplicate is called with a non-assembled matrix
3092      In fact, MatDuplicate only requires the matrix to be preallocated
3093      This may happen inside a DMCreateMatrix_Shell */
3094   if (oldmat->lvec) {
3095     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3096     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3097   }
3098   if (oldmat->Mvctx) {
3099     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3100     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3101   }
3102   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3103   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3104   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3105   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3106   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3107   *newmat = mat;
3108   PetscFunctionReturn(0);
3109 }
3110 
3111 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3112 {
3113   PetscBool      isbinary, ishdf5;
3114   PetscErrorCode ierr;
3115 
3116   PetscFunctionBegin;
3117   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3118   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3119   /* force binary viewer to load .info file if it has not yet done so */
3120   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3121   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3122   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3123   if (isbinary) {
3124     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3125   } else if (ishdf5) {
3126 #if defined(PETSC_HAVE_HDF5)
3127     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3128 #else
3129     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3130 #endif
3131   } else {
3132     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3133   }
3134   PetscFunctionReturn(0);
3135 }
3136 
3137 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3138 {
3139   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3140   PetscInt       *rowidxs,*colidxs;
3141   PetscScalar    *matvals;
3142   PetscErrorCode ierr;
3143 
3144   PetscFunctionBegin;
3145   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3146 
3147   /* read in matrix header */
3148   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3149   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3150   M  = header[1]; N = header[2]; nz = header[3];
3151   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3152   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3153   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3154 
3155   /* set block sizes from the viewer's .info file */
3156   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3157   /* set global sizes if not set already */
3158   if (mat->rmap->N < 0) mat->rmap->N = M;
3159   if (mat->cmap->N < 0) mat->cmap->N = N;
3160   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3161   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3162 
3163   /* check if the matrix sizes are correct */
3164   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3165   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3166 
3167   /* read in row lengths and build row indices */
3168   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3169   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3170   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3171   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3172   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3173   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3174   /* read in column indices and matrix values */
3175   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3176   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3177   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3178   /* store matrix indices and values */
3179   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3180   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3181   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3182   PetscFunctionReturn(0);
3183 }
3184 
3185 /* Not scalable because of ISAllGather() unless getting all columns. */
3186 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3187 {
3188   PetscErrorCode ierr;
3189   IS             iscol_local;
3190   PetscBool      isstride;
3191   PetscMPIInt    lisstride=0,gisstride;
3192 
3193   PetscFunctionBegin;
3194   /* check if we are grabbing all columns*/
3195   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3196 
3197   if (isstride) {
3198     PetscInt  start,len,mstart,mlen;
3199     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3200     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3201     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3202     if (mstart == start && mlen-mstart == len) lisstride = 1;
3203   }
3204 
3205   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3206   if (gisstride) {
3207     PetscInt N;
3208     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3209     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3210     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3211     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3212   } else {
3213     PetscInt cbs;
3214     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3215     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3216     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3217   }
3218 
3219   *isseq = iscol_local;
3220   PetscFunctionReturn(0);
3221 }
3222 
3223 /*
3224  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3225  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3226 
3227  Input Parameters:
3228    mat - matrix
3229    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3230            i.e., mat->rstart <= isrow[i] < mat->rend
3231    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3232            i.e., mat->cstart <= iscol[i] < mat->cend
3233  Output Parameter:
3234    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3235    iscol_o - sequential column index set for retrieving mat->B
3236    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3237  */
3238 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3239 {
3240   PetscErrorCode ierr;
3241   Vec            x,cmap;
3242   const PetscInt *is_idx;
3243   PetscScalar    *xarray,*cmaparray;
3244   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3245   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3246   Mat            B=a->B;
3247   Vec            lvec=a->lvec,lcmap;
3248   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3249   MPI_Comm       comm;
3250   VecScatter     Mvctx=a->Mvctx;
3251 
3252   PetscFunctionBegin;
3253   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3254   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3255 
3256   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3257   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3258   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3259   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3260   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3261 
3262   /* Get start indices */
3263   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3264   isstart -= ncols;
3265   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3266 
3267   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3268   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3269   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3270   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3271   for (i=0; i<ncols; i++) {
3272     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3273     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3274     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3275   }
3276   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3277   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3278   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3279 
3280   /* Get iscol_d */
3281   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3282   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3283   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3284 
3285   /* Get isrow_d */
3286   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3287   rstart = mat->rmap->rstart;
3288   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3289   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3290   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3291   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3292 
3293   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3294   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3295   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3296 
3297   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3298   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3299   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3300 
3301   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3302 
3303   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3304   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3305 
3306   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3307   /* off-process column indices */
3308   count = 0;
3309   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3310   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3311 
3312   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3313   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3314   for (i=0; i<Bn; i++) {
3315     if (PetscRealPart(xarray[i]) > -1.0) {
3316       idx[count]     = i;                   /* local column index in off-diagonal part B */
3317       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3318       count++;
3319     }
3320   }
3321   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3322   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3323 
3324   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3325   /* cannot ensure iscol_o has same blocksize as iscol! */
3326 
3327   ierr = PetscFree(idx);CHKERRQ(ierr);
3328   *garray = cmap1;
3329 
3330   ierr = VecDestroy(&x);CHKERRQ(ierr);
3331   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3332   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3337 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3338 {
3339   PetscErrorCode ierr;
3340   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3341   Mat            M = NULL;
3342   MPI_Comm       comm;
3343   IS             iscol_d,isrow_d,iscol_o;
3344   Mat            Asub = NULL,Bsub = NULL;
3345   PetscInt       n;
3346 
3347   PetscFunctionBegin;
3348   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3349 
3350   if (call == MAT_REUSE_MATRIX) {
3351     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3352     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3353     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3354 
3355     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3356     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3357 
3358     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3359     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3360 
3361     /* Update diagonal and off-diagonal portions of submat */
3362     asub = (Mat_MPIAIJ*)(*submat)->data;
3363     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3364     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3365     if (n) {
3366       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3367     }
3368     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3369     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3370 
3371   } else { /* call == MAT_INITIAL_MATRIX) */
3372     const PetscInt *garray;
3373     PetscInt        BsubN;
3374 
3375     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3376     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3377 
3378     /* Create local submatrices Asub and Bsub */
3379     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3380     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3381 
3382     /* Create submatrix M */
3383     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3384 
3385     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3386     asub = (Mat_MPIAIJ*)M->data;
3387 
3388     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3389     n = asub->B->cmap->N;
3390     if (BsubN > n) {
3391       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3392       const PetscInt *idx;
3393       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3394       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3395 
3396       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3397       j = 0;
3398       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3399       for (i=0; i<n; i++) {
3400         if (j >= BsubN) break;
3401         while (subgarray[i] > garray[j]) j++;
3402 
3403         if (subgarray[i] == garray[j]) {
3404           idx_new[i] = idx[j++];
3405         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3406       }
3407       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3408 
3409       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3410       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3411 
3412     } else if (BsubN < n) {
3413       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3414     }
3415 
3416     ierr = PetscFree(garray);CHKERRQ(ierr);
3417     *submat = M;
3418 
3419     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3420     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3421     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3422 
3423     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3424     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3425 
3426     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3427     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3428   }
3429   PetscFunctionReturn(0);
3430 }
3431 
3432 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3433 {
3434   PetscErrorCode ierr;
3435   IS             iscol_local=NULL,isrow_d;
3436   PetscInt       csize;
3437   PetscInt       n,i,j,start,end;
3438   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3439   MPI_Comm       comm;
3440 
3441   PetscFunctionBegin;
3442   /* If isrow has same processor distribution as mat,
3443      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3444   if (call == MAT_REUSE_MATRIX) {
3445     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3446     if (isrow_d) {
3447       sameRowDist  = PETSC_TRUE;
3448       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3449     } else {
3450       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3451       if (iscol_local) {
3452         sameRowDist  = PETSC_TRUE;
3453         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3454       }
3455     }
3456   } else {
3457     /* Check if isrow has same processor distribution as mat */
3458     sameDist[0] = PETSC_FALSE;
3459     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3460     if (!n) {
3461       sameDist[0] = PETSC_TRUE;
3462     } else {
3463       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3464       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3465       if (i >= start && j < end) {
3466         sameDist[0] = PETSC_TRUE;
3467       }
3468     }
3469 
3470     /* Check if iscol has same processor distribution as mat */
3471     sameDist[1] = PETSC_FALSE;
3472     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3473     if (!n) {
3474       sameDist[1] = PETSC_TRUE;
3475     } else {
3476       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3477       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3478       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3479     }
3480 
3481     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3482     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3483     sameRowDist = tsameDist[0];
3484   }
3485 
3486   if (sameRowDist) {
3487     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3488       /* isrow and iscol have same processor distribution as mat */
3489       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3490       PetscFunctionReturn(0);
3491     } else { /* sameRowDist */
3492       /* isrow has same processor distribution as mat */
3493       if (call == MAT_INITIAL_MATRIX) {
3494         PetscBool sorted;
3495         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3496         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3497         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3498         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3499 
3500         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3501         if (sorted) {
3502           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3503           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3504           PetscFunctionReturn(0);
3505         }
3506       } else { /* call == MAT_REUSE_MATRIX */
3507         IS iscol_sub;
3508         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3509         if (iscol_sub) {
3510           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3511           PetscFunctionReturn(0);
3512         }
3513       }
3514     }
3515   }
3516 
3517   /* General case: iscol -> iscol_local which has global size of iscol */
3518   if (call == MAT_REUSE_MATRIX) {
3519     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3520     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3521   } else {
3522     if (!iscol_local) {
3523       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3524     }
3525   }
3526 
3527   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3528   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3529 
3530   if (call == MAT_INITIAL_MATRIX) {
3531     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3532     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3533   }
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 /*@C
3538      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3539          and "off-diagonal" part of the matrix in CSR format.
3540 
3541    Collective
3542 
3543    Input Parameters:
3544 +  comm - MPI communicator
3545 .  A - "diagonal" portion of matrix
3546 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3547 -  garray - global index of B columns
3548 
3549    Output Parameter:
3550 .   mat - the matrix, with input A as its local diagonal matrix
3551    Level: advanced
3552 
3553    Notes:
3554        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3555        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3556 
3557 .seealso: MatCreateMPIAIJWithSplitArrays()
3558 @*/
3559 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3560 {
3561   PetscErrorCode ierr;
3562   Mat_MPIAIJ     *maij;
3563   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3564   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3565   PetscScalar    *oa=b->a;
3566   Mat            Bnew;
3567   PetscInt       m,n,N;
3568 
3569   PetscFunctionBegin;
3570   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3571   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3572   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3573   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3574   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3575   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3576 
3577   /* Get global columns of mat */
3578   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3579 
3580   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3581   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3582   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3583   maij = (Mat_MPIAIJ*)(*mat)->data;
3584 
3585   (*mat)->preallocated = PETSC_TRUE;
3586 
3587   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3588   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3589 
3590   /* Set A as diagonal portion of *mat */
3591   maij->A = A;
3592 
3593   nz = oi[m];
3594   for (i=0; i<nz; i++) {
3595     col   = oj[i];
3596     oj[i] = garray[col];
3597   }
3598 
3599    /* Set Bnew as off-diagonal portion of *mat */
3600   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3601   bnew        = (Mat_SeqAIJ*)Bnew->data;
3602   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3603   maij->B     = Bnew;
3604 
3605   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3606 
3607   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3608   b->free_a       = PETSC_FALSE;
3609   b->free_ij      = PETSC_FALSE;
3610   ierr = MatDestroy(&B);CHKERRQ(ierr);
3611 
3612   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3613   bnew->free_a       = PETSC_TRUE;
3614   bnew->free_ij      = PETSC_TRUE;
3615 
3616   /* condense columns of maij->B */
3617   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3618   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3620   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3621   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3622   PetscFunctionReturn(0);
3623 }
3624 
3625 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3626 
3627 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3628 {
3629   PetscErrorCode ierr;
3630   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3631   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3632   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3633   Mat            M,Msub,B=a->B;
3634   MatScalar      *aa;
3635   Mat_SeqAIJ     *aij;
3636   PetscInt       *garray = a->garray,*colsub,Ncols;
3637   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3638   IS             iscol_sub,iscmap;
3639   const PetscInt *is_idx,*cmap;
3640   PetscBool      allcolumns=PETSC_FALSE;
3641   MPI_Comm       comm;
3642 
3643   PetscFunctionBegin;
3644   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3645   if (call == MAT_REUSE_MATRIX) {
3646     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3647     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3648     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3649 
3650     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3651     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3652 
3653     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3654     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3655 
3656     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3657 
3658   } else { /* call == MAT_INITIAL_MATRIX) */
3659     PetscBool flg;
3660 
3661     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3662     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3663 
3664     /* (1) iscol -> nonscalable iscol_local */
3665     /* Check for special case: each processor gets entire matrix columns */
3666     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3667     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3668     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3669     if (allcolumns) {
3670       iscol_sub = iscol_local;
3671       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3672       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3673 
3674     } else {
3675       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3676       PetscInt *idx,*cmap1,k;
3677       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3678       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3679       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3680       count = 0;
3681       k     = 0;
3682       for (i=0; i<Ncols; i++) {
3683         j = is_idx[i];
3684         if (j >= cstart && j < cend) {
3685           /* diagonal part of mat */
3686           idx[count]     = j;
3687           cmap1[count++] = i; /* column index in submat */
3688         } else if (Bn) {
3689           /* off-diagonal part of mat */
3690           if (j == garray[k]) {
3691             idx[count]     = j;
3692             cmap1[count++] = i;  /* column index in submat */
3693           } else if (j > garray[k]) {
3694             while (j > garray[k] && k < Bn-1) k++;
3695             if (j == garray[k]) {
3696               idx[count]     = j;
3697               cmap1[count++] = i; /* column index in submat */
3698             }
3699           }
3700         }
3701       }
3702       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3703 
3704       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3705       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3706       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3707 
3708       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3709     }
3710 
3711     /* (3) Create sequential Msub */
3712     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3713   }
3714 
3715   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3716   aij  = (Mat_SeqAIJ*)(Msub)->data;
3717   ii   = aij->i;
3718   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3719 
3720   /*
3721       m - number of local rows
3722       Ncols - number of columns (same on all processors)
3723       rstart - first row in new global matrix generated
3724   */
3725   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3726 
3727   if (call == MAT_INITIAL_MATRIX) {
3728     /* (4) Create parallel newmat */
3729     PetscMPIInt    rank,size;
3730     PetscInt       csize;
3731 
3732     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3733     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3734 
3735     /*
3736         Determine the number of non-zeros in the diagonal and off-diagonal
3737         portions of the matrix in order to do correct preallocation
3738     */
3739 
3740     /* first get start and end of "diagonal" columns */
3741     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3742     if (csize == PETSC_DECIDE) {
3743       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3744       if (mglobal == Ncols) { /* square matrix */
3745         nlocal = m;
3746       } else {
3747         nlocal = Ncols/size + ((Ncols % size) > rank);
3748       }
3749     } else {
3750       nlocal = csize;
3751     }
3752     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3753     rstart = rend - nlocal;
3754     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3755 
3756     /* next, compute all the lengths */
3757     jj    = aij->j;
3758     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3759     olens = dlens + m;
3760     for (i=0; i<m; i++) {
3761       jend = ii[i+1] - ii[i];
3762       olen = 0;
3763       dlen = 0;
3764       for (j=0; j<jend; j++) {
3765         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3766         else dlen++;
3767         jj++;
3768       }
3769       olens[i] = olen;
3770       dlens[i] = dlen;
3771     }
3772 
3773     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3774     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3775 
3776     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3777     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3778     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3779     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3780     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3781     ierr = PetscFree(dlens);CHKERRQ(ierr);
3782 
3783   } else { /* call == MAT_REUSE_MATRIX */
3784     M    = *newmat;
3785     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3786     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3787     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3788     /*
3789          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3790        rather than the slower MatSetValues().
3791     */
3792     M->was_assembled = PETSC_TRUE;
3793     M->assembled     = PETSC_FALSE;
3794   }
3795 
3796   /* (5) Set values of Msub to *newmat */
3797   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3798   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3799 
3800   jj   = aij->j;
3801   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3802   for (i=0; i<m; i++) {
3803     row = rstart + i;
3804     nz  = ii[i+1] - ii[i];
3805     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3806     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3807     jj += nz; aa += nz;
3808   }
3809   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3810   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3811 
3812   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3813   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3814 
3815   ierr = PetscFree(colsub);CHKERRQ(ierr);
3816 
3817   /* save Msub, iscol_sub and iscmap used in processor for next request */
3818   if (call == MAT_INITIAL_MATRIX) {
3819     *newmat = M;
3820     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3821     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3822 
3823     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3824     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3825 
3826     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3827     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3828 
3829     if (iscol_local) {
3830       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3831       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3832     }
3833   }
3834   PetscFunctionReturn(0);
3835 }
3836 
3837 /*
3838     Not great since it makes two copies of the submatrix, first an SeqAIJ
3839   in local and then by concatenating the local matrices the end result.
3840   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3841 
3842   Note: This requires a sequential iscol with all indices.
3843 */
3844 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3845 {
3846   PetscErrorCode ierr;
3847   PetscMPIInt    rank,size;
3848   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3849   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3850   Mat            M,Mreuse;
3851   MatScalar      *aa,*vwork;
3852   MPI_Comm       comm;
3853   Mat_SeqAIJ     *aij;
3854   PetscBool      colflag,allcolumns=PETSC_FALSE;
3855 
3856   PetscFunctionBegin;
3857   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3858   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3859   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3860 
3861   /* Check for special case: each processor gets entire matrix columns */
3862   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3863   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3864   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3865   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3866 
3867   if (call ==  MAT_REUSE_MATRIX) {
3868     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3869     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3870     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3871   } else {
3872     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3873   }
3874 
3875   /*
3876       m - number of local rows
3877       n - number of columns (same on all processors)
3878       rstart - first row in new global matrix generated
3879   */
3880   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3881   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3882   if (call == MAT_INITIAL_MATRIX) {
3883     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3884     ii  = aij->i;
3885     jj  = aij->j;
3886 
3887     /*
3888         Determine the number of non-zeros in the diagonal and off-diagonal
3889         portions of the matrix in order to do correct preallocation
3890     */
3891 
3892     /* first get start and end of "diagonal" columns */
3893     if (csize == PETSC_DECIDE) {
3894       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3895       if (mglobal == n) { /* square matrix */
3896         nlocal = m;
3897       } else {
3898         nlocal = n/size + ((n % size) > rank);
3899       }
3900     } else {
3901       nlocal = csize;
3902     }
3903     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3904     rstart = rend - nlocal;
3905     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3906 
3907     /* next, compute all the lengths */
3908     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3909     olens = dlens + m;
3910     for (i=0; i<m; i++) {
3911       jend = ii[i+1] - ii[i];
3912       olen = 0;
3913       dlen = 0;
3914       for (j=0; j<jend; j++) {
3915         if (*jj < rstart || *jj >= rend) olen++;
3916         else dlen++;
3917         jj++;
3918       }
3919       olens[i] = olen;
3920       dlens[i] = dlen;
3921     }
3922     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3923     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3924     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3925     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3926     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3927     ierr = PetscFree(dlens);CHKERRQ(ierr);
3928   } else {
3929     PetscInt ml,nl;
3930 
3931     M    = *newmat;
3932     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3933     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3934     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3935     /*
3936          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3937        rather than the slower MatSetValues().
3938     */
3939     M->was_assembled = PETSC_TRUE;
3940     M->assembled     = PETSC_FALSE;
3941   }
3942   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3943   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3944   ii   = aij->i;
3945   jj   = aij->j;
3946 
3947   /* trigger copy to CPU if needed */
3948   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3949   for (i=0; i<m; i++) {
3950     row   = rstart + i;
3951     nz    = ii[i+1] - ii[i];
3952     cwork = jj; jj += nz;
3953     vwork = aa; aa += nz;
3954     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3955   }
3956   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3957 
3958   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3959   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3960   *newmat = M;
3961 
3962   /* save submatrix used in processor for next request */
3963   if (call ==  MAT_INITIAL_MATRIX) {
3964     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3965     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3966   }
3967   PetscFunctionReturn(0);
3968 }
3969 
3970 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3971 {
3972   PetscInt       m,cstart, cend,j,nnz,i,d;
3973   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3974   const PetscInt *JJ;
3975   PetscErrorCode ierr;
3976   PetscBool      nooffprocentries;
3977 
3978   PetscFunctionBegin;
3979   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3980 
3981   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3982   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3983   m      = B->rmap->n;
3984   cstart = B->cmap->rstart;
3985   cend   = B->cmap->rend;
3986   rstart = B->rmap->rstart;
3987 
3988   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3989 
3990   if (PetscDefined(USE_DEBUG)) {
3991     for (i=0; i<m; i++) {
3992       nnz = Ii[i+1]- Ii[i];
3993       JJ  = J + Ii[i];
3994       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3995       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3996       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3997     }
3998   }
3999 
4000   for (i=0; i<m; i++) {
4001     nnz     = Ii[i+1]- Ii[i];
4002     JJ      = J + Ii[i];
4003     nnz_max = PetscMax(nnz_max,nnz);
4004     d       = 0;
4005     for (j=0; j<nnz; j++) {
4006       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4007     }
4008     d_nnz[i] = d;
4009     o_nnz[i] = nnz - d;
4010   }
4011   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4012   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4013 
4014   for (i=0; i<m; i++) {
4015     ii   = i + rstart;
4016     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4017   }
4018   nooffprocentries    = B->nooffprocentries;
4019   B->nooffprocentries = PETSC_TRUE;
4020   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4021   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4022   B->nooffprocentries = nooffprocentries;
4023 
4024   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4025   PetscFunctionReturn(0);
4026 }
4027 
4028 /*@
4029    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4030    (the default parallel PETSc format).
4031 
4032    Collective
4033 
4034    Input Parameters:
4035 +  B - the matrix
4036 .  i - the indices into j for the start of each local row (starts with zero)
4037 .  j - the column indices for each local row (starts with zero)
4038 -  v - optional values in the matrix
4039 
4040    Level: developer
4041 
4042    Notes:
4043        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4044      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4045      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4046 
4047        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4048 
4049        The format which is used for the sparse matrix input, is equivalent to a
4050     row-major ordering.. i.e for the following matrix, the input data expected is
4051     as shown
4052 
4053 $        1 0 0
4054 $        2 0 3     P0
4055 $       -------
4056 $        4 5 6     P1
4057 $
4058 $     Process0 [P0]: rows_owned=[0,1]
4059 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4060 $        j =  {0,0,2}  [size = 3]
4061 $        v =  {1,2,3}  [size = 3]
4062 $
4063 $     Process1 [P1]: rows_owned=[2]
4064 $        i =  {0,3}    [size = nrow+1  = 1+1]
4065 $        j =  {0,1,2}  [size = 3]
4066 $        v =  {4,5,6}  [size = 3]
4067 
4068 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4069           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4070 @*/
4071 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4072 {
4073   PetscErrorCode ierr;
4074 
4075   PetscFunctionBegin;
4076   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079 
4080 /*@C
4081    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4082    (the default parallel PETSc format).  For good matrix assembly performance
4083    the user should preallocate the matrix storage by setting the parameters
4084    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4085    performance can be increased by more than a factor of 50.
4086 
4087    Collective
4088 
4089    Input Parameters:
4090 +  B - the matrix
4091 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4092            (same value is used for all local rows)
4093 .  d_nnz - array containing the number of nonzeros in the various rows of the
4094            DIAGONAL portion of the local submatrix (possibly different for each row)
4095            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4096            The size of this array is equal to the number of local rows, i.e 'm'.
4097            For matrices that will be factored, you must leave room for (and set)
4098            the diagonal entry even if it is zero.
4099 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4100            submatrix (same value is used for all local rows).
4101 -  o_nnz - array containing the number of nonzeros in the various rows of the
4102            OFF-DIAGONAL portion of the local submatrix (possibly different for
4103            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4104            structure. The size of this array is equal to the number
4105            of local rows, i.e 'm'.
4106 
4107    If the *_nnz parameter is given then the *_nz parameter is ignored
4108 
4109    The AIJ format (also called the Yale sparse matrix format or
4110    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4111    storage.  The stored row and column indices begin with zero.
4112    See Users-Manual: ch_mat for details.
4113 
4114    The parallel matrix is partitioned such that the first m0 rows belong to
4115    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4116    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4117 
4118    The DIAGONAL portion of the local submatrix of a processor can be defined
4119    as the submatrix which is obtained by extraction the part corresponding to
4120    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4121    first row that belongs to the processor, r2 is the last row belonging to
4122    the this processor, and c1-c2 is range of indices of the local part of a
4123    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4124    common case of a square matrix, the row and column ranges are the same and
4125    the DIAGONAL part is also square. The remaining portion of the local
4126    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4127 
4128    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4129 
4130    You can call MatGetInfo() to get information on how effective the preallocation was;
4131    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4132    You can also run with the option -info and look for messages with the string
4133    malloc in them to see if additional memory allocation was needed.
4134 
4135    Example usage:
4136 
4137    Consider the following 8x8 matrix with 34 non-zero values, that is
4138    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4139    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4140    as follows:
4141 
4142 .vb
4143             1  2  0  |  0  3  0  |  0  4
4144     Proc0   0  5  6  |  7  0  0  |  8  0
4145             9  0 10  | 11  0  0  | 12  0
4146     -------------------------------------
4147            13  0 14  | 15 16 17  |  0  0
4148     Proc1   0 18  0  | 19 20 21  |  0  0
4149             0  0  0  | 22 23  0  | 24  0
4150     -------------------------------------
4151     Proc2  25 26 27  |  0  0 28  | 29  0
4152            30  0  0  | 31 32 33  |  0 34
4153 .ve
4154 
4155    This can be represented as a collection of submatrices as:
4156 
4157 .vb
4158       A B C
4159       D E F
4160       G H I
4161 .ve
4162 
4163    Where the submatrices A,B,C are owned by proc0, D,E,F are
4164    owned by proc1, G,H,I are owned by proc2.
4165 
4166    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4167    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4168    The 'M','N' parameters are 8,8, and have the same values on all procs.
4169 
4170    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4171    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4172    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4173    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4174    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4175    matrix, ans [DF] as another SeqAIJ matrix.
4176 
4177    When d_nz, o_nz parameters are specified, d_nz storage elements are
4178    allocated for every row of the local diagonal submatrix, and o_nz
4179    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4180    One way to choose d_nz and o_nz is to use the max nonzerors per local
4181    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4182    In this case, the values of d_nz,o_nz are:
4183 .vb
4184      proc0 : dnz = 2, o_nz = 2
4185      proc1 : dnz = 3, o_nz = 2
4186      proc2 : dnz = 1, o_nz = 4
4187 .ve
4188    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4189    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4190    for proc3. i.e we are using 12+15+10=37 storage locations to store
4191    34 values.
4192 
4193    When d_nnz, o_nnz parameters are specified, the storage is specified
4194    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4195    In the above case the values for d_nnz,o_nnz are:
4196 .vb
4197      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4198      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4199      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4200 .ve
4201    Here the space allocated is sum of all the above values i.e 34, and
4202    hence pre-allocation is perfect.
4203 
4204    Level: intermediate
4205 
4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4207           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4208 @*/
4209 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4210 {
4211   PetscErrorCode ierr;
4212 
4213   PetscFunctionBegin;
4214   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4215   PetscValidType(B,1);
4216   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4217   PetscFunctionReturn(0);
4218 }
4219 
4220 /*@
4221      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4222          CSR format for the local rows.
4223 
4224    Collective
4225 
4226    Input Parameters:
4227 +  comm - MPI communicator
4228 .  m - number of local rows (Cannot be PETSC_DECIDE)
4229 .  n - This value should be the same as the local size used in creating the
4230        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4231        calculated if N is given) For square matrices n is almost always m.
4232 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4233 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4234 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4235 .   j - column indices
4236 -   a - matrix values
4237 
4238    Output Parameter:
4239 .   mat - the matrix
4240 
4241    Level: intermediate
4242 
4243    Notes:
4244        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4245      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4246      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4247 
4248        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4249 
4250        The format which is used for the sparse matrix input, is equivalent to a
4251     row-major ordering.. i.e for the following matrix, the input data expected is
4252     as shown
4253 
4254        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4255 
4256 $        1 0 0
4257 $        2 0 3     P0
4258 $       -------
4259 $        4 5 6     P1
4260 $
4261 $     Process0 [P0]: rows_owned=[0,1]
4262 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4263 $        j =  {0,0,2}  [size = 3]
4264 $        v =  {1,2,3}  [size = 3]
4265 $
4266 $     Process1 [P1]: rows_owned=[2]
4267 $        i =  {0,3}    [size = nrow+1  = 1+1]
4268 $        j =  {0,1,2}  [size = 3]
4269 $        v =  {4,5,6}  [size = 3]
4270 
4271 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4272           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4273 @*/
4274 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4275 {
4276   PetscErrorCode ierr;
4277 
4278   PetscFunctionBegin;
4279   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4280   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4281   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4282   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4283   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4284   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4285   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4286   PetscFunctionReturn(0);
4287 }
4288 
4289 /*@
4290      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4291          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4292 
4293    Collective
4294 
4295    Input Parameters:
4296 +  mat - the matrix
4297 .  m - number of local rows (Cannot be PETSC_DECIDE)
4298 .  n - This value should be the same as the local size used in creating the
4299        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4300        calculated if N is given) For square matrices n is almost always m.
4301 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4302 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4303 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4304 .  J - column indices
4305 -  v - matrix values
4306 
4307    Level: intermediate
4308 
4309 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4310           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4311 @*/
4312 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4313 {
4314   PetscErrorCode ierr;
4315   PetscInt       cstart,nnz,i,j;
4316   PetscInt       *ld;
4317   PetscBool      nooffprocentries;
4318   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4319   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4320   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4321   const PetscInt *Adi = Ad->i;
4322   PetscInt       ldi,Iii,md;
4323 
4324   PetscFunctionBegin;
4325   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4326   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4327   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4328   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4329 
4330   cstart = mat->cmap->rstart;
4331   if (!Aij->ld) {
4332     /* count number of entries below block diagonal */
4333     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4334     Aij->ld = ld;
4335     for (i=0; i<m; i++) {
4336       nnz  = Ii[i+1]- Ii[i];
4337       j     = 0;
4338       while  (J[j] < cstart && j < nnz) {j++;}
4339       J    += nnz;
4340       ld[i] = j;
4341     }
4342   } else {
4343     ld = Aij->ld;
4344   }
4345 
4346   for (i=0; i<m; i++) {
4347     nnz  = Ii[i+1]- Ii[i];
4348     Iii  = Ii[i];
4349     ldi  = ld[i];
4350     md   = Adi[i+1]-Adi[i];
4351     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4352     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4353     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4354     ad  += md;
4355     ao  += nnz - md;
4356   }
4357   nooffprocentries      = mat->nooffprocentries;
4358   mat->nooffprocentries = PETSC_TRUE;
4359   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4360   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4361   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4362   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4363   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4364   mat->nooffprocentries = nooffprocentries;
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 /*@C
4369    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4370    (the default parallel PETSc format).  For good matrix assembly performance
4371    the user should preallocate the matrix storage by setting the parameters
4372    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4373    performance can be increased by more than a factor of 50.
4374 
4375    Collective
4376 
4377    Input Parameters:
4378 +  comm - MPI communicator
4379 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4380            This value should be the same as the local size used in creating the
4381            y vector for the matrix-vector product y = Ax.
4382 .  n - This value should be the same as the local size used in creating the
4383        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4384        calculated if N is given) For square matrices n is almost always m.
4385 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4386 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4387 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4388            (same value is used for all local rows)
4389 .  d_nnz - array containing the number of nonzeros in the various rows of the
4390            DIAGONAL portion of the local submatrix (possibly different for each row)
4391            or NULL, if d_nz is used to specify the nonzero structure.
4392            The size of this array is equal to the number of local rows, i.e 'm'.
4393 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4394            submatrix (same value is used for all local rows).
4395 -  o_nnz - array containing the number of nonzeros in the various rows of the
4396            OFF-DIAGONAL portion of the local submatrix (possibly different for
4397            each row) or NULL, if o_nz is used to specify the nonzero
4398            structure. The size of this array is equal to the number
4399            of local rows, i.e 'm'.
4400 
4401    Output Parameter:
4402 .  A - the matrix
4403 
4404    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4405    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4406    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4407 
4408    Notes:
4409    If the *_nnz parameter is given then the *_nz parameter is ignored
4410 
4411    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4412    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4413    storage requirements for this matrix.
4414 
4415    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4416    processor than it must be used on all processors that share the object for
4417    that argument.
4418 
4419    The user MUST specify either the local or global matrix dimensions
4420    (possibly both).
4421 
4422    The parallel matrix is partitioned across processors such that the
4423    first m0 rows belong to process 0, the next m1 rows belong to
4424    process 1, the next m2 rows belong to process 2 etc.. where
4425    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4426    values corresponding to [m x N] submatrix.
4427 
4428    The columns are logically partitioned with the n0 columns belonging
4429    to 0th partition, the next n1 columns belonging to the next
4430    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4431 
4432    The DIAGONAL portion of the local submatrix on any given processor
4433    is the submatrix corresponding to the rows and columns m,n
4434    corresponding to the given processor. i.e diagonal matrix on
4435    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4436    etc. The remaining portion of the local submatrix [m x (N-n)]
4437    constitute the OFF-DIAGONAL portion. The example below better
4438    illustrates this concept.
4439 
4440    For a square global matrix we define each processor's diagonal portion
4441    to be its local rows and the corresponding columns (a square submatrix);
4442    each processor's off-diagonal portion encompasses the remainder of the
4443    local matrix (a rectangular submatrix).
4444 
4445    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4446 
4447    When calling this routine with a single process communicator, a matrix of
4448    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4449    type of communicator, use the construction mechanism
4450 .vb
4451      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4452 .ve
4453 
4454 $     MatCreate(...,&A);
4455 $     MatSetType(A,MATMPIAIJ);
4456 $     MatSetSizes(A, m,n,M,N);
4457 $     MatMPIAIJSetPreallocation(A,...);
4458 
4459    By default, this format uses inodes (identical nodes) when possible.
4460    We search for consecutive rows with the same nonzero structure, thereby
4461    reusing matrix information to achieve increased efficiency.
4462 
4463    Options Database Keys:
4464 +  -mat_no_inode  - Do not use inodes
4465 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4466 
4467 
4468 
4469    Example usage:
4470 
4471    Consider the following 8x8 matrix with 34 non-zero values, that is
4472    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4473    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4474    as follows
4475 
4476 .vb
4477             1  2  0  |  0  3  0  |  0  4
4478     Proc0   0  5  6  |  7  0  0  |  8  0
4479             9  0 10  | 11  0  0  | 12  0
4480     -------------------------------------
4481            13  0 14  | 15 16 17  |  0  0
4482     Proc1   0 18  0  | 19 20 21  |  0  0
4483             0  0  0  | 22 23  0  | 24  0
4484     -------------------------------------
4485     Proc2  25 26 27  |  0  0 28  | 29  0
4486            30  0  0  | 31 32 33  |  0 34
4487 .ve
4488 
4489    This can be represented as a collection of submatrices as
4490 
4491 .vb
4492       A B C
4493       D E F
4494       G H I
4495 .ve
4496 
4497    Where the submatrices A,B,C are owned by proc0, D,E,F are
4498    owned by proc1, G,H,I are owned by proc2.
4499 
4500    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4501    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4502    The 'M','N' parameters are 8,8, and have the same values on all procs.
4503 
4504    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4505    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4506    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4507    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4508    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4509    matrix, ans [DF] as another SeqAIJ matrix.
4510 
4511    When d_nz, o_nz parameters are specified, d_nz storage elements are
4512    allocated for every row of the local diagonal submatrix, and o_nz
4513    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4514    One way to choose d_nz and o_nz is to use the max nonzerors per local
4515    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4516    In this case, the values of d_nz,o_nz are
4517 .vb
4518      proc0 : dnz = 2, o_nz = 2
4519      proc1 : dnz = 3, o_nz = 2
4520      proc2 : dnz = 1, o_nz = 4
4521 .ve
4522    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4523    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4524    for proc3. i.e we are using 12+15+10=37 storage locations to store
4525    34 values.
4526 
4527    When d_nnz, o_nnz parameters are specified, the storage is specified
4528    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4529    In the above case the values for d_nnz,o_nnz are
4530 .vb
4531      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4532      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4533      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4534 .ve
4535    Here the space allocated is sum of all the above values i.e 34, and
4536    hence pre-allocation is perfect.
4537 
4538    Level: intermediate
4539 
4540 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4541           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4542 @*/
4543 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4544 {
4545   PetscErrorCode ierr;
4546   PetscMPIInt    size;
4547 
4548   PetscFunctionBegin;
4549   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4550   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4551   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4552   if (size > 1) {
4553     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4554     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4555   } else {
4556     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4557     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4558   }
4559   PetscFunctionReturn(0);
4560 }
4561 
4562 /*@C
4563   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4564 
4565   Not collective
4566 
4567   Input Parameter:
4568 . A - The MPIAIJ matrix
4569 
4570   Output Parameters:
4571 + Ad - The local diagonal block as a SeqAIJ matrix
4572 . Ao - The local off-diagonal block as a SeqAIJ matrix
4573 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4574 
4575   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4576   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4577   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4578   local column numbers to global column numbers in the original matrix.
4579 
4580   Level: intermediate
4581 
4582 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4583 @*/
4584 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4585 {
4586   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4587   PetscBool      flg;
4588   PetscErrorCode ierr;
4589 
4590   PetscFunctionBegin;
4591   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4592   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4593   if (Ad)     *Ad     = a->A;
4594   if (Ao)     *Ao     = a->B;
4595   if (colmap) *colmap = a->garray;
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4600 {
4601   PetscErrorCode ierr;
4602   PetscInt       m,N,i,rstart,nnz,Ii;
4603   PetscInt       *indx;
4604   PetscScalar    *values;
4605 
4606   PetscFunctionBegin;
4607   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4608   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4609     PetscInt       *dnz,*onz,sum,bs,cbs;
4610 
4611     if (n == PETSC_DECIDE) {
4612       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4613     }
4614     /* Check sum(n) = N */
4615     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4616     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4617 
4618     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4619     rstart -= m;
4620 
4621     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4622     for (i=0; i<m; i++) {
4623       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4624       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4625       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4626     }
4627 
4628     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4629     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4630     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4631     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4632     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4633     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4634     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4635     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4636   }
4637 
4638   /* numeric phase */
4639   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4640   for (i=0; i<m; i++) {
4641     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4642     Ii   = i + rstart;
4643     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4644     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4645   }
4646   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4647   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4648   PetscFunctionReturn(0);
4649 }
4650 
4651 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4652 {
4653   PetscErrorCode    ierr;
4654   PetscMPIInt       rank;
4655   PetscInt          m,N,i,rstart,nnz;
4656   size_t            len;
4657   const PetscInt    *indx;
4658   PetscViewer       out;
4659   char              *name;
4660   Mat               B;
4661   const PetscScalar *values;
4662 
4663   PetscFunctionBegin;
4664   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4665   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4666   /* Should this be the type of the diagonal block of A? */
4667   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4668   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4669   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4670   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4671   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4672   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4673   for (i=0; i<m; i++) {
4674     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4675     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4676     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4677   }
4678   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4679   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4680 
4681   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4682   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4683   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4684   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4685   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4686   ierr = PetscFree(name);CHKERRQ(ierr);
4687   ierr = MatView(B,out);CHKERRQ(ierr);
4688   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4689   ierr = MatDestroy(&B);CHKERRQ(ierr);
4690   PetscFunctionReturn(0);
4691 }
4692 
4693 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4694 {
4695   PetscErrorCode      ierr;
4696   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4697 
4698   PetscFunctionBegin;
4699   if (!merge) PetscFunctionReturn(0);
4700   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4701   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4702   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4703   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4704   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4705   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4706   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4707   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4708   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4709   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4710   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4711   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4712   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4713   ierr = PetscFree(merge);CHKERRQ(ierr);
4714   PetscFunctionReturn(0);
4715 }
4716 
4717 #include <../src/mat/utils/freespace.h>
4718 #include <petscbt.h>
4719 
4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4721 {
4722   PetscErrorCode      ierr;
4723   MPI_Comm            comm;
4724   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4725   PetscMPIInt         size,rank,taga,*len_s;
4726   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4727   PetscInt            proc,m;
4728   PetscInt            **buf_ri,**buf_rj;
4729   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4730   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4731   MPI_Request         *s_waits,*r_waits;
4732   MPI_Status          *status;
4733   MatScalar           *aa=a->a;
4734   MatScalar           **abuf_r,*ba_i;
4735   Mat_Merge_SeqsToMPI *merge;
4736   PetscContainer      container;
4737 
4738   PetscFunctionBegin;
4739   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4740   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4741 
4742   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4743   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4744 
4745   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4746   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4747   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4748 
4749   bi     = merge->bi;
4750   bj     = merge->bj;
4751   buf_ri = merge->buf_ri;
4752   buf_rj = merge->buf_rj;
4753 
4754   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4755   owners = merge->rowmap->range;
4756   len_s  = merge->len_s;
4757 
4758   /* send and recv matrix values */
4759   /*-----------------------------*/
4760   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4761   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4762 
4763   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4764   for (proc=0,k=0; proc<size; proc++) {
4765     if (!len_s[proc]) continue;
4766     i    = owners[proc];
4767     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4768     k++;
4769   }
4770 
4771   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4772   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4773   ierr = PetscFree(status);CHKERRQ(ierr);
4774 
4775   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4776   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4777 
4778   /* insert mat values of mpimat */
4779   /*----------------------------*/
4780   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4781   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4782 
4783   for (k=0; k<merge->nrecv; k++) {
4784     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4785     nrows       = *(buf_ri_k[k]);
4786     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4787     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4788   }
4789 
4790   /* set values of ba */
4791   m = merge->rowmap->n;
4792   for (i=0; i<m; i++) {
4793     arow = owners[rank] + i;
4794     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4795     bnzi = bi[i+1] - bi[i];
4796     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4797 
4798     /* add local non-zero vals of this proc's seqmat into ba */
4799     anzi   = ai[arow+1] - ai[arow];
4800     aj     = a->j + ai[arow];
4801     aa     = a->a + ai[arow];
4802     nextaj = 0;
4803     for (j=0; nextaj<anzi; j++) {
4804       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4805         ba_i[j] += aa[nextaj++];
4806       }
4807     }
4808 
4809     /* add received vals into ba */
4810     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4811       /* i-th row */
4812       if (i == *nextrow[k]) {
4813         anzi   = *(nextai[k]+1) - *nextai[k];
4814         aj     = buf_rj[k] + *(nextai[k]);
4815         aa     = abuf_r[k] + *(nextai[k]);
4816         nextaj = 0;
4817         for (j=0; nextaj<anzi; j++) {
4818           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4819             ba_i[j] += aa[nextaj++];
4820           }
4821         }
4822         nextrow[k]++; nextai[k]++;
4823       }
4824     }
4825     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4826   }
4827   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4828   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4829 
4830   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4831   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4832   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4833   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4834   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4835   PetscFunctionReturn(0);
4836 }
4837 
4838 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4839 {
4840   PetscErrorCode      ierr;
4841   Mat                 B_mpi;
4842   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4843   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4844   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4845   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4846   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4847   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4848   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4849   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4850   MPI_Status          *status;
4851   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4852   PetscBT             lnkbt;
4853   Mat_Merge_SeqsToMPI *merge;
4854   PetscContainer      container;
4855 
4856   PetscFunctionBegin;
4857   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4858 
4859   /* make sure it is a PETSc comm */
4860   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4861   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4862   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4863 
4864   ierr = PetscNew(&merge);CHKERRQ(ierr);
4865   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4866 
4867   /* determine row ownership */
4868   /*---------------------------------------------------------*/
4869   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4870   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4871   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4872   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4873   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4874   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4875   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4876 
4877   m      = merge->rowmap->n;
4878   owners = merge->rowmap->range;
4879 
4880   /* determine the number of messages to send, their lengths */
4881   /*---------------------------------------------------------*/
4882   len_s = merge->len_s;
4883 
4884   len          = 0; /* length of buf_si[] */
4885   merge->nsend = 0;
4886   for (proc=0; proc<size; proc++) {
4887     len_si[proc] = 0;
4888     if (proc == rank) {
4889       len_s[proc] = 0;
4890     } else {
4891       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4892       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4893     }
4894     if (len_s[proc]) {
4895       merge->nsend++;
4896       nrows = 0;
4897       for (i=owners[proc]; i<owners[proc+1]; i++) {
4898         if (ai[i+1] > ai[i]) nrows++;
4899       }
4900       len_si[proc] = 2*(nrows+1);
4901       len         += len_si[proc];
4902     }
4903   }
4904 
4905   /* determine the number and length of messages to receive for ij-structure */
4906   /*-------------------------------------------------------------------------*/
4907   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4908   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4909 
4910   /* post the Irecv of j-structure */
4911   /*-------------------------------*/
4912   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4913   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4914 
4915   /* post the Isend of j-structure */
4916   /*--------------------------------*/
4917   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4918 
4919   for (proc=0, k=0; proc<size; proc++) {
4920     if (!len_s[proc]) continue;
4921     i    = owners[proc];
4922     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4923     k++;
4924   }
4925 
4926   /* receives and sends of j-structure are complete */
4927   /*------------------------------------------------*/
4928   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4929   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4930 
4931   /* send and recv i-structure */
4932   /*---------------------------*/
4933   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4934   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4935 
4936   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4937   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4938   for (proc=0,k=0; proc<size; proc++) {
4939     if (!len_s[proc]) continue;
4940     /* form outgoing message for i-structure:
4941          buf_si[0]:                 nrows to be sent
4942                [1:nrows]:           row index (global)
4943                [nrows+1:2*nrows+1]: i-structure index
4944     */
4945     /*-------------------------------------------*/
4946     nrows       = len_si[proc]/2 - 1;
4947     buf_si_i    = buf_si + nrows+1;
4948     buf_si[0]   = nrows;
4949     buf_si_i[0] = 0;
4950     nrows       = 0;
4951     for (i=owners[proc]; i<owners[proc+1]; i++) {
4952       anzi = ai[i+1] - ai[i];
4953       if (anzi) {
4954         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4955         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4956         nrows++;
4957       }
4958     }
4959     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4960     k++;
4961     buf_si += len_si[proc];
4962   }
4963 
4964   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4965   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4966 
4967   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4968   for (i=0; i<merge->nrecv; i++) {
4969     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4970   }
4971 
4972   ierr = PetscFree(len_si);CHKERRQ(ierr);
4973   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4974   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4975   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4976   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4977   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4978   ierr = PetscFree(status);CHKERRQ(ierr);
4979 
4980   /* compute a local seq matrix in each processor */
4981   /*----------------------------------------------*/
4982   /* allocate bi array and free space for accumulating nonzero column info */
4983   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4984   bi[0] = 0;
4985 
4986   /* create and initialize a linked list */
4987   nlnk = N+1;
4988   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4989 
4990   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4991   len  = ai[owners[rank+1]] - ai[owners[rank]];
4992   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4993 
4994   current_space = free_space;
4995 
4996   /* determine symbolic info for each local row */
4997   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4998 
4999   for (k=0; k<merge->nrecv; k++) {
5000     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5001     nrows       = *buf_ri_k[k];
5002     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5003     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5004   }
5005 
5006   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5007   len  = 0;
5008   for (i=0; i<m; i++) {
5009     bnzi = 0;
5010     /* add local non-zero cols of this proc's seqmat into lnk */
5011     arow  = owners[rank] + i;
5012     anzi  = ai[arow+1] - ai[arow];
5013     aj    = a->j + ai[arow];
5014     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5015     bnzi += nlnk;
5016     /* add received col data into lnk */
5017     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5018       if (i == *nextrow[k]) { /* i-th row */
5019         anzi  = *(nextai[k]+1) - *nextai[k];
5020         aj    = buf_rj[k] + *nextai[k];
5021         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5022         bnzi += nlnk;
5023         nextrow[k]++; nextai[k]++;
5024       }
5025     }
5026     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5027 
5028     /* if free space is not available, make more free space */
5029     if (current_space->local_remaining<bnzi) {
5030       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5031       nspacedouble++;
5032     }
5033     /* copy data into free space, then initialize lnk */
5034     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5035     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5036 
5037     current_space->array           += bnzi;
5038     current_space->local_used      += bnzi;
5039     current_space->local_remaining -= bnzi;
5040 
5041     bi[i+1] = bi[i] + bnzi;
5042   }
5043 
5044   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5045 
5046   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5047   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5048   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5049 
5050   /* create symbolic parallel matrix B_mpi */
5051   /*---------------------------------------*/
5052   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5053   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5054   if (n==PETSC_DECIDE) {
5055     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5056   } else {
5057     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5058   }
5059   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5060   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5061   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5062   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5063   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5064 
5065   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5066   B_mpi->assembled  = PETSC_FALSE;
5067   merge->bi         = bi;
5068   merge->bj         = bj;
5069   merge->buf_ri     = buf_ri;
5070   merge->buf_rj     = buf_rj;
5071   merge->coi        = NULL;
5072   merge->coj        = NULL;
5073   merge->owners_co  = NULL;
5074 
5075   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5076 
5077   /* attach the supporting struct to B_mpi for reuse */
5078   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5079   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5080   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5081   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5082   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5083   *mpimat = B_mpi;
5084 
5085   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5086   PetscFunctionReturn(0);
5087 }
5088 
5089 /*@C
5090       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5091                  matrices from each processor
5092 
5093     Collective
5094 
5095    Input Parameters:
5096 +    comm - the communicators the parallel matrix will live on
5097 .    seqmat - the input sequential matrices
5098 .    m - number of local rows (or PETSC_DECIDE)
5099 .    n - number of local columns (or PETSC_DECIDE)
5100 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5101 
5102    Output Parameter:
5103 .    mpimat - the parallel matrix generated
5104 
5105     Level: advanced
5106 
5107    Notes:
5108      The dimensions of the sequential matrix in each processor MUST be the same.
5109      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5110      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5111 @*/
5112 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5113 {
5114   PetscErrorCode ierr;
5115   PetscMPIInt    size;
5116 
5117   PetscFunctionBegin;
5118   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5119   if (size == 1) {
5120     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5121     if (scall == MAT_INITIAL_MATRIX) {
5122       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5123     } else {
5124       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5125     }
5126     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5127     PetscFunctionReturn(0);
5128   }
5129   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5130   if (scall == MAT_INITIAL_MATRIX) {
5131     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5132   }
5133   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5134   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5135   PetscFunctionReturn(0);
5136 }
5137 
5138 /*@
5139      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5140           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5141           with MatGetSize()
5142 
5143     Not Collective
5144 
5145    Input Parameters:
5146 +    A - the matrix
5147 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5148 
5149    Output Parameter:
5150 .    A_loc - the local sequential matrix generated
5151 
5152     Level: developer
5153 
5154    Notes:
5155      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5156      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5157      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5158      modify the values of the returned A_loc.
5159 
5160 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5161 @*/
5162 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5163 {
5164   PetscErrorCode ierr;
5165   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5166   Mat_SeqAIJ     *mat,*a,*b;
5167   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5168   MatScalar      *aa,*ba,*cam;
5169   PetscScalar    *ca;
5170   PetscMPIInt    size;
5171   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5172   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5173   PetscBool      match;
5174 
5175   PetscFunctionBegin;
5176   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5177   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5178   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5179   if (size == 1) {
5180     if (scall == MAT_INITIAL_MATRIX) {
5181       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5182       *A_loc = mpimat->A;
5183     } else if (scall == MAT_REUSE_MATRIX) {
5184       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5185     }
5186     PetscFunctionReturn(0);
5187   }
5188 
5189   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5190   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5191   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5192   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5193   aa = a->a; ba = b->a;
5194   if (scall == MAT_INITIAL_MATRIX) {
5195     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5196     ci[0] = 0;
5197     for (i=0; i<am; i++) {
5198       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5199     }
5200     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5201     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5202     k    = 0;
5203     for (i=0; i<am; i++) {
5204       ncols_o = bi[i+1] - bi[i];
5205       ncols_d = ai[i+1] - ai[i];
5206       /* off-diagonal portion of A */
5207       for (jo=0; jo<ncols_o; jo++) {
5208         col = cmap[*bj];
5209         if (col >= cstart) break;
5210         cj[k]   = col; bj++;
5211         ca[k++] = *ba++;
5212       }
5213       /* diagonal portion of A */
5214       for (j=0; j<ncols_d; j++) {
5215         cj[k]   = cstart + *aj++;
5216         ca[k++] = *aa++;
5217       }
5218       /* off-diagonal portion of A */
5219       for (j=jo; j<ncols_o; j++) {
5220         cj[k]   = cmap[*bj++];
5221         ca[k++] = *ba++;
5222       }
5223     }
5224     /* put together the new matrix */
5225     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5226     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5227     /* Since these are PETSc arrays, change flags to free them as necessary. */
5228     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5229     mat->free_a  = PETSC_TRUE;
5230     mat->free_ij = PETSC_TRUE;
5231     mat->nonew   = 0;
5232   } else if (scall == MAT_REUSE_MATRIX) {
5233     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5234     ci = mat->i; cj = mat->j; cam = mat->a;
5235     for (i=0; i<am; i++) {
5236       /* off-diagonal portion of A */
5237       ncols_o = bi[i+1] - bi[i];
5238       for (jo=0; jo<ncols_o; jo++) {
5239         col = cmap[*bj];
5240         if (col >= cstart) break;
5241         *cam++ = *ba++; bj++;
5242       }
5243       /* diagonal portion of A */
5244       ncols_d = ai[i+1] - ai[i];
5245       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5246       /* off-diagonal portion of A */
5247       for (j=jo; j<ncols_o; j++) {
5248         *cam++ = *ba++; bj++;
5249       }
5250     }
5251   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5252   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5253   PetscFunctionReturn(0);
5254 }
5255 
5256 /*@
5257      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5258           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5259 
5260     Not Collective
5261 
5262    Input Parameters:
5263 +    A - the matrix
5264 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5265 
5266    Output Parameter:
5267 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5268 -    A_loc - the local sequential matrix generated
5269 
5270     Level: developer
5271 
5272    Notes:
5273      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5274 
5275 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5276 
5277 @*/
5278 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5279 {
5280   PetscErrorCode ierr;
5281   Mat            Ao,Ad;
5282   const PetscInt *cmap;
5283   PetscMPIInt    size;
5284   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5285 
5286   PetscFunctionBegin;
5287   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5288   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5289   if (size == 1) {
5290     if (scall == MAT_INITIAL_MATRIX) {
5291       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5292       *A_loc = Ad;
5293     } else if (scall == MAT_REUSE_MATRIX) {
5294       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5295     }
5296     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5297     PetscFunctionReturn(0);
5298   }
5299   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5300   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5301   if (f) {
5302     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5303   } else {
5304     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5305     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5306     Mat_SeqAIJ        *c;
5307     PetscInt          *ai = a->i, *aj = a->j;
5308     PetscInt          *bi = b->i, *bj = b->j;
5309     PetscInt          *ci,*cj;
5310     const PetscScalar *aa,*ba;
5311     PetscScalar       *ca;
5312     PetscInt          i,j,am,dn,on;
5313 
5314     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5315     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5316     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5317     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5318     if (scall == MAT_INITIAL_MATRIX) {
5319       PetscInt k;
5320       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5321       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5322       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5323       ci[0] = 0;
5324       for (i=0,k=0; i<am; i++) {
5325         const PetscInt ncols_o = bi[i+1] - bi[i];
5326         const PetscInt ncols_d = ai[i+1] - ai[i];
5327         ci[i+1] = ci[i] + ncols_o + ncols_d;
5328         /* diagonal portion of A */
5329         for (j=0; j<ncols_d; j++,k++) {
5330           cj[k] = *aj++;
5331           ca[k] = *aa++;
5332         }
5333         /* off-diagonal portion of A */
5334         for (j=0; j<ncols_o; j++,k++) {
5335           cj[k] = dn + *bj++;
5336           ca[k] = *ba++;
5337         }
5338       }
5339       /* put together the new matrix */
5340       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5341       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5342       /* Since these are PETSc arrays, change flags to free them as necessary. */
5343       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5344       c->free_a  = PETSC_TRUE;
5345       c->free_ij = PETSC_TRUE;
5346       c->nonew   = 0;
5347       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5348     } else if (scall == MAT_REUSE_MATRIX) {
5349 #if defined(PETSC_HAVE_DEVICE)
5350       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5351 #endif
5352       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5353       ca = c->a;
5354       for (i=0; i<am; i++) {
5355         const PetscInt ncols_d = ai[i+1] - ai[i];
5356         const PetscInt ncols_o = bi[i+1] - bi[i];
5357         /* diagonal portion of A */
5358         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5359         /* off-diagonal portion of A */
5360         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5361       }
5362     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5363     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5364     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5365     if (glob) {
5366       PetscInt cst, *gidx;
5367 
5368       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5369       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5370       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5371       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5372       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5373     }
5374   }
5375   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5376   PetscFunctionReturn(0);
5377 }
5378 
5379 /*@C
5380      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5381 
5382     Not Collective
5383 
5384    Input Parameters:
5385 +    A - the matrix
5386 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5387 -    row, col - index sets of rows and columns to extract (or NULL)
5388 
5389    Output Parameter:
5390 .    A_loc - the local sequential matrix generated
5391 
5392     Level: developer
5393 
5394 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5395 
5396 @*/
5397 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5398 {
5399   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5400   PetscErrorCode ierr;
5401   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5402   IS             isrowa,iscola;
5403   Mat            *aloc;
5404   PetscBool      match;
5405 
5406   PetscFunctionBegin;
5407   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5408   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5409   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5410   if (!row) {
5411     start = A->rmap->rstart; end = A->rmap->rend;
5412     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5413   } else {
5414     isrowa = *row;
5415   }
5416   if (!col) {
5417     start = A->cmap->rstart;
5418     cmap  = a->garray;
5419     nzA   = a->A->cmap->n;
5420     nzB   = a->B->cmap->n;
5421     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5422     ncols = 0;
5423     for (i=0; i<nzB; i++) {
5424       if (cmap[i] < start) idx[ncols++] = cmap[i];
5425       else break;
5426     }
5427     imark = i;
5428     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5429     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5430     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5431   } else {
5432     iscola = *col;
5433   }
5434   if (scall != MAT_INITIAL_MATRIX) {
5435     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5436     aloc[0] = *A_loc;
5437   }
5438   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5439   if (!col) { /* attach global id of condensed columns */
5440     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5441   }
5442   *A_loc = aloc[0];
5443   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5444   if (!row) {
5445     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5446   }
5447   if (!col) {
5448     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5449   }
5450   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5451   PetscFunctionReturn(0);
5452 }
5453 
5454 /*
5455  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5456  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5457  * on a global size.
5458  * */
5459 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5460 {
5461   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5462   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5463   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5464   PetscMPIInt              owner;
5465   PetscSFNode              *iremote,*oiremote;
5466   const PetscInt           *lrowindices;
5467   PetscErrorCode           ierr;
5468   PetscSF                  sf,osf;
5469   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5470   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5471   MPI_Comm                 comm;
5472   ISLocalToGlobalMapping   mapping;
5473 
5474   PetscFunctionBegin;
5475   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5476   /* plocalsize is the number of roots
5477    * nrows is the number of leaves
5478    * */
5479   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5480   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5481   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5482   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5483   for (i=0;i<nrows;i++) {
5484     /* Find a remote index and an owner for a row
5485      * The row could be local or remote
5486      * */
5487     owner = 0;
5488     lidx  = 0;
5489     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5490     iremote[i].index = lidx;
5491     iremote[i].rank  = owner;
5492   }
5493   /* Create SF to communicate how many nonzero columns for each row */
5494   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5495   /* SF will figure out the number of nonzero colunms for each row, and their
5496    * offsets
5497    * */
5498   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5499   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5500   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5501 
5502   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5503   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5504   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5505   roffsets[0] = 0;
5506   roffsets[1] = 0;
5507   for (i=0;i<plocalsize;i++) {
5508     /* diag */
5509     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5510     /* off diag */
5511     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5512     /* compute offsets so that we relative location for each row */
5513     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5514     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5515   }
5516   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5517   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5518   /* 'r' means root, and 'l' means leaf */
5519   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5520   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5521   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5522   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5523   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5524   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5525   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5526   dntotalcols = 0;
5527   ontotalcols = 0;
5528   ncol = 0;
5529   for (i=0;i<nrows;i++) {
5530     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5531     ncol = PetscMax(pnnz[i],ncol);
5532     /* diag */
5533     dntotalcols += nlcols[i*2+0];
5534     /* off diag */
5535     ontotalcols += nlcols[i*2+1];
5536   }
5537   /* We do not need to figure the right number of columns
5538    * since all the calculations will be done by going through the raw data
5539    * */
5540   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5541   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5542   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5543   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5544   /* diag */
5545   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5546   /* off diag */
5547   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5548   /* diag */
5549   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5550   /* off diag */
5551   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5552   dntotalcols = 0;
5553   ontotalcols = 0;
5554   ntotalcols  = 0;
5555   for (i=0;i<nrows;i++) {
5556     owner = 0;
5557     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5558     /* Set iremote for diag matrix */
5559     for (j=0;j<nlcols[i*2+0];j++) {
5560       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5561       iremote[dntotalcols].rank    = owner;
5562       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5563       ilocal[dntotalcols++]        = ntotalcols++;
5564     }
5565     /* off diag */
5566     for (j=0;j<nlcols[i*2+1];j++) {
5567       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5568       oiremote[ontotalcols].rank    = owner;
5569       oilocal[ontotalcols++]        = ntotalcols++;
5570     }
5571   }
5572   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5573   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5574   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5575   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5576   /* P serves as roots and P_oth is leaves
5577    * Diag matrix
5578    * */
5579   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5580   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5581   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5582 
5583   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5584   /* Off diag */
5585   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5586   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5587   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5588   /* We operate on the matrix internal data for saving memory */
5589   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5590   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5591   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5592   /* Convert to global indices for diag matrix */
5593   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5594   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5595   /* We want P_oth store global indices */
5596   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5597   /* Use memory scalable approach */
5598   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5599   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5600   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5601   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5602   /* Convert back to local indices */
5603   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5604   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5605   nout = 0;
5606   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5607   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5608   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5609   /* Exchange values */
5610   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5611   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5612   /* Stop PETSc from shrinking memory */
5613   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5614   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5615   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5616   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5617   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5618   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5619   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5620   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5621   PetscFunctionReturn(0);
5622 }
5623 
5624 /*
5625  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5626  * This supports MPIAIJ and MAIJ
5627  * */
5628 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5629 {
5630   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5631   Mat_SeqAIJ            *p_oth;
5632   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5633   IS                    rows,map;
5634   PetscHMapI            hamp;
5635   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5636   MPI_Comm              comm;
5637   PetscSF               sf,osf;
5638   PetscBool             has;
5639   PetscErrorCode        ierr;
5640 
5641   PetscFunctionBegin;
5642   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5643   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5644   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5645    *  and then create a submatrix (that often is an overlapping matrix)
5646    * */
5647   if (reuse == MAT_INITIAL_MATRIX) {
5648     /* Use a hash table to figure out unique keys */
5649     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5650     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5651     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5652     count = 0;
5653     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5654     for (i=0;i<a->B->cmap->n;i++) {
5655       key  = a->garray[i]/dof;
5656       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5657       if (!has) {
5658         mapping[i] = count;
5659         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5660       } else {
5661         /* Current 'i' has the same value the previous step */
5662         mapping[i] = count-1;
5663       }
5664     }
5665     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5666     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5667     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5668     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5669     off = 0;
5670     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5671     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5672     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5673     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5674     /* In case, the matrix was already created but users want to recreate the matrix */
5675     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5676     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5677     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5678     ierr = ISDestroy(&map);CHKERRQ(ierr);
5679     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5680   } else if (reuse == MAT_REUSE_MATRIX) {
5681     /* If matrix was already created, we simply update values using SF objects
5682      * that as attached to the matrix ealier.
5683      *  */
5684     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5685     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5686     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5687     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5688     /* Update values in place */
5689     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5690     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5691     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5692     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5693   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5694   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5695   PetscFunctionReturn(0);
5696 }
5697 
5698 /*@C
5699     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5700 
5701     Collective on Mat
5702 
5703    Input Parameters:
5704 +    A,B - the matrices in mpiaij format
5705 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5706 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5707 
5708    Output Parameter:
5709 +    rowb, colb - index sets of rows and columns of B to extract
5710 -    B_seq - the sequential matrix generated
5711 
5712     Level: developer
5713 
5714 @*/
5715 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5716 {
5717   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5718   PetscErrorCode ierr;
5719   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5720   IS             isrowb,iscolb;
5721   Mat            *bseq=NULL;
5722 
5723   PetscFunctionBegin;
5724   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5725     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5726   }
5727   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5728 
5729   if (scall == MAT_INITIAL_MATRIX) {
5730     start = A->cmap->rstart;
5731     cmap  = a->garray;
5732     nzA   = a->A->cmap->n;
5733     nzB   = a->B->cmap->n;
5734     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5735     ncols = 0;
5736     for (i=0; i<nzB; i++) {  /* row < local row index */
5737       if (cmap[i] < start) idx[ncols++] = cmap[i];
5738       else break;
5739     }
5740     imark = i;
5741     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5742     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5743     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5744     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5745   } else {
5746     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5747     isrowb  = *rowb; iscolb = *colb;
5748     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5749     bseq[0] = *B_seq;
5750   }
5751   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5752   *B_seq = bseq[0];
5753   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5754   if (!rowb) {
5755     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5756   } else {
5757     *rowb = isrowb;
5758   }
5759   if (!colb) {
5760     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5761   } else {
5762     *colb = iscolb;
5763   }
5764   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5765   PetscFunctionReturn(0);
5766 }
5767 
5768 /*
5769     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5770     of the OFF-DIAGONAL portion of local A
5771 
5772     Collective on Mat
5773 
5774    Input Parameters:
5775 +    A,B - the matrices in mpiaij format
5776 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5777 
5778    Output Parameter:
5779 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5780 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5781 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5782 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5783 
5784     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5785      for this matrix. This is not desirable..
5786 
5787     Level: developer
5788 
5789 */
5790 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5791 {
5792   PetscErrorCode         ierr;
5793   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5794   Mat_SeqAIJ             *b_oth;
5795   VecScatter             ctx;
5796   MPI_Comm               comm;
5797   const PetscMPIInt      *rprocs,*sprocs;
5798   const PetscInt         *srow,*rstarts,*sstarts;
5799   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5800   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5801   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5802   MPI_Request            *rwaits = NULL,*swaits = NULL;
5803   MPI_Status             rstatus;
5804   PetscMPIInt            size,tag,rank,nsends_mpi,nrecvs_mpi;
5805   PETSC_UNUSED PetscMPIInt jj;
5806 
5807   PetscFunctionBegin;
5808   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5809   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5810 
5811   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5812     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5813   }
5814   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5815   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5816 
5817   if (size == 1) {
5818     startsj_s = NULL;
5819     bufa_ptr  = NULL;
5820     *B_oth    = NULL;
5821     PetscFunctionReturn(0);
5822   }
5823 
5824   ctx = a->Mvctx;
5825   tag = ((PetscObject)ctx)->tag;
5826 
5827   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5828   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5829   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5830   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5831   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5832   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5833 
5834   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5835   if (scall == MAT_INITIAL_MATRIX) {
5836     /* i-array */
5837     /*---------*/
5838     /*  post receives */
5839     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5840     for (i=0; i<nrecvs; i++) {
5841       rowlen = rvalues + rstarts[i]*rbs;
5842       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5843       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5844     }
5845 
5846     /* pack the outgoing message */
5847     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5848 
5849     sstartsj[0] = 0;
5850     rstartsj[0] = 0;
5851     len         = 0; /* total length of j or a array to be sent */
5852     if (nsends) {
5853       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5854       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5855     }
5856     for (i=0; i<nsends; i++) {
5857       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5858       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5859       for (j=0; j<nrows; j++) {
5860         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5861         for (l=0; l<sbs; l++) {
5862           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5863 
5864           rowlen[j*sbs+l] = ncols;
5865 
5866           len += ncols;
5867           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5868         }
5869         k++;
5870       }
5871       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5872 
5873       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5874     }
5875     /* recvs and sends of i-array are completed */
5876     i = nrecvs;
5877     while (i--) {
5878       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5879     }
5880     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5881     ierr = PetscFree(svalues);CHKERRQ(ierr);
5882 
5883     /* allocate buffers for sending j and a arrays */
5884     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5885     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5886 
5887     /* create i-array of B_oth */
5888     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5889 
5890     b_othi[0] = 0;
5891     len       = 0; /* total length of j or a array to be received */
5892     k         = 0;
5893     for (i=0; i<nrecvs; i++) {
5894       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5895       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5896       for (j=0; j<nrows; j++) {
5897         b_othi[k+1] = b_othi[k] + rowlen[j];
5898         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5899         k++;
5900       }
5901       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5902     }
5903     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5904 
5905     /* allocate space for j and a arrrays of B_oth */
5906     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5907     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5908 
5909     /* j-array */
5910     /*---------*/
5911     /*  post receives of j-array */
5912     for (i=0; i<nrecvs; i++) {
5913       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5914       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5915     }
5916 
5917     /* pack the outgoing message j-array */
5918     if (nsends) k = sstarts[0];
5919     for (i=0; i<nsends; i++) {
5920       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5921       bufJ  = bufj+sstartsj[i];
5922       for (j=0; j<nrows; j++) {
5923         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5924         for (ll=0; ll<sbs; ll++) {
5925           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5926           for (l=0; l<ncols; l++) {
5927             *bufJ++ = cols[l];
5928           }
5929           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5930         }
5931       }
5932       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5933     }
5934 
5935     /* recvs and sends of j-array are completed */
5936     i = nrecvs;
5937     while (i--) {
5938       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5939     }
5940     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5941   } else if (scall == MAT_REUSE_MATRIX) {
5942     sstartsj = *startsj_s;
5943     rstartsj = *startsj_r;
5944     bufa     = *bufa_ptr;
5945     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5946     b_otha   = b_oth->a;
5947 #if defined(PETSC_HAVE_DEVICE)
5948     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5949 #endif
5950   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5951 
5952   /* a-array */
5953   /*---------*/
5954   /*  post receives of a-array */
5955   for (i=0; i<nrecvs; i++) {
5956     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5957     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5958   }
5959 
5960   /* pack the outgoing message a-array */
5961   if (nsends) k = sstarts[0];
5962   for (i=0; i<nsends; i++) {
5963     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5964     bufA  = bufa+sstartsj[i];
5965     for (j=0; j<nrows; j++) {
5966       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5967       for (ll=0; ll<sbs; ll++) {
5968         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5969         for (l=0; l<ncols; l++) {
5970           *bufA++ = vals[l];
5971         }
5972         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5973       }
5974     }
5975     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5976   }
5977   /* recvs and sends of a-array are completed */
5978   i = nrecvs;
5979   while (i--) {
5980     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5981   }
5982   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5983   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5984 
5985   if (scall == MAT_INITIAL_MATRIX) {
5986     /* put together the new matrix */
5987     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5988 
5989     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5990     /* Since these are PETSc arrays, change flags to free them as necessary. */
5991     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5992     b_oth->free_a  = PETSC_TRUE;
5993     b_oth->free_ij = PETSC_TRUE;
5994     b_oth->nonew   = 0;
5995 
5996     ierr = PetscFree(bufj);CHKERRQ(ierr);
5997     if (!startsj_s || !bufa_ptr) {
5998       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5999       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
6000     } else {
6001       *startsj_s = sstartsj;
6002       *startsj_r = rstartsj;
6003       *bufa_ptr  = bufa;
6004     }
6005   }
6006 
6007   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
6008   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
6009   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
6010   PetscFunctionReturn(0);
6011 }
6012 
6013 /*@C
6014   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
6015 
6016   Not Collective
6017 
6018   Input Parameters:
6019 . A - The matrix in mpiaij format
6020 
6021   Output Parameter:
6022 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
6023 . colmap - A map from global column index to local index into lvec
6024 - multScatter - A scatter from the argument of a matrix-vector product to lvec
6025 
6026   Level: developer
6027 
6028 @*/
6029 #if defined(PETSC_USE_CTABLE)
6030 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
6031 #else
6032 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
6033 #endif
6034 {
6035   Mat_MPIAIJ *a;
6036 
6037   PetscFunctionBegin;
6038   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
6039   PetscValidPointer(lvec, 2);
6040   PetscValidPointer(colmap, 3);
6041   PetscValidPointer(multScatter, 4);
6042   a = (Mat_MPIAIJ*) A->data;
6043   if (lvec) *lvec = a->lvec;
6044   if (colmap) *colmap = a->colmap;
6045   if (multScatter) *multScatter = a->Mvctx;
6046   PetscFunctionReturn(0);
6047 }
6048 
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
6052 #if defined(PETSC_HAVE_MKL_SPARSE)
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
6054 #endif
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
6057 #if defined(PETSC_HAVE_ELEMENTAL)
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
6059 #endif
6060 #if defined(PETSC_HAVE_SCALAPACK)
6061 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
6062 #endif
6063 #if defined(PETSC_HAVE_HYPRE)
6064 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
6065 #endif
6066 #if defined(PETSC_HAVE_CUDA)
6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
6068 #endif
6069 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
6071 #endif
6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
6073 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
6074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6075 
6076 /*
6077     Computes (B'*A')' since computing B*A directly is untenable
6078 
6079                n                       p                          p
6080         [             ]       [             ]         [                 ]
6081       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6082         [             ]       [             ]         [                 ]
6083 
6084 */
6085 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
6086 {
6087   PetscErrorCode ierr;
6088   Mat            At,Bt,Ct;
6089 
6090   PetscFunctionBegin;
6091   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
6092   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
6093   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
6094   ierr = MatDestroy(&At);CHKERRQ(ierr);
6095   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
6096   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
6097   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
6098   PetscFunctionReturn(0);
6099 }
6100 
6101 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6102 {
6103   PetscErrorCode ierr;
6104   PetscBool      cisdense;
6105 
6106   PetscFunctionBegin;
6107   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
6108   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
6109   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6110   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6111   if (!cisdense) {
6112     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6113   }
6114   ierr = MatSetUp(C);CHKERRQ(ierr);
6115 
6116   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6117   PetscFunctionReturn(0);
6118 }
6119 
6120 /* ----------------------------------------------------------------*/
6121 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6122 {
6123   Mat_Product *product = C->product;
6124   Mat         A = product->A,B=product->B;
6125 
6126   PetscFunctionBegin;
6127   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6128     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6129 
6130   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6131   C->ops->productsymbolic = MatProductSymbolic_AB;
6132   PetscFunctionReturn(0);
6133 }
6134 
6135 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6136 {
6137   PetscErrorCode ierr;
6138   Mat_Product    *product = C->product;
6139 
6140   PetscFunctionBegin;
6141   if (product->type == MATPRODUCT_AB) {
6142     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6143   }
6144   PetscFunctionReturn(0);
6145 }
6146 /* ----------------------------------------------------------------*/
6147 
6148 /*MC
6149    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6150 
6151    Options Database Keys:
6152 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6153 
6154    Level: beginner
6155 
6156    Notes:
6157     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6158     in this case the values associated with the rows and columns one passes in are set to zero
6159     in the matrix
6160 
6161     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6162     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6163 
6164 .seealso: MatCreateAIJ()
6165 M*/
6166 
6167 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6168 {
6169   Mat_MPIAIJ     *b;
6170   PetscErrorCode ierr;
6171   PetscMPIInt    size;
6172 
6173   PetscFunctionBegin;
6174   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6175 
6176   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6177   B->data       = (void*)b;
6178   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6179   B->assembled  = PETSC_FALSE;
6180   B->insertmode = NOT_SET_VALUES;
6181   b->size       = size;
6182 
6183   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6184 
6185   /* build cache for off array entries formed */
6186   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6187 
6188   b->donotstash  = PETSC_FALSE;
6189   b->colmap      = NULL;
6190   b->garray      = NULL;
6191   b->roworiented = PETSC_TRUE;
6192 
6193   /* stuff used for matrix vector multiply */
6194   b->lvec  = NULL;
6195   b->Mvctx = NULL;
6196 
6197   /* stuff for MatGetRow() */
6198   b->rowindices   = NULL;
6199   b->rowvalues    = NULL;
6200   b->getrowactive = PETSC_FALSE;
6201 
6202   /* flexible pointer used in CUSPARSE classes */
6203   b->spptr = NULL;
6204 
6205   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6206   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6207   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6208   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6209   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6210   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6211   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6212   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6213   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6214   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6215 #if defined(PETSC_HAVE_CUDA)
6216   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6217 #endif
6218 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6219   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6220 #endif
6221 #if defined(PETSC_HAVE_MKL_SPARSE)
6222   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6223 #endif
6224   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6225   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6226   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6227 #if defined(PETSC_HAVE_ELEMENTAL)
6228   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6229 #endif
6230 #if defined(PETSC_HAVE_SCALAPACK)
6231   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6232 #endif
6233   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6234   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6235 #if defined(PETSC_HAVE_HYPRE)
6236   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6237   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6238 #endif
6239   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6241   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6242   PetscFunctionReturn(0);
6243 }
6244 
6245 /*@C
6246      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6247          and "off-diagonal" part of the matrix in CSR format.
6248 
6249    Collective
6250 
6251    Input Parameters:
6252 +  comm - MPI communicator
6253 .  m - number of local rows (Cannot be PETSC_DECIDE)
6254 .  n - This value should be the same as the local size used in creating the
6255        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6256        calculated if N is given) For square matrices n is almost always m.
6257 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6258 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6259 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6260 .   j - column indices
6261 .   a - matrix values
6262 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6263 .   oj - column indices
6264 -   oa - matrix values
6265 
6266    Output Parameter:
6267 .   mat - the matrix
6268 
6269    Level: advanced
6270 
6271    Notes:
6272        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6273        must free the arrays once the matrix has been destroyed and not before.
6274 
6275        The i and j indices are 0 based
6276 
6277        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6278 
6279        This sets local rows and cannot be used to set off-processor values.
6280 
6281        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6282        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6283        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6284        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6285        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6286        communication if it is known that only local entries will be set.
6287 
6288 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6289           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6290 @*/
6291 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6292 {
6293   PetscErrorCode ierr;
6294   Mat_MPIAIJ     *maij;
6295 
6296   PetscFunctionBegin;
6297   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6298   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6299   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6300   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6301   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6302   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6303   maij = (Mat_MPIAIJ*) (*mat)->data;
6304 
6305   (*mat)->preallocated = PETSC_TRUE;
6306 
6307   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6308   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6309 
6310   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6311   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6312 
6313   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6314   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6315   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6316   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6317 
6318   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6319   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6320   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6321   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6322   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6323   PetscFunctionReturn(0);
6324 }
6325 
6326 /*
6327     Special version for direct calls from Fortran
6328 */
6329 #include <petsc/private/fortranimpl.h>
6330 
6331 /* Change these macros so can be used in void function */
6332 #undef CHKERRQ
6333 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6334 #undef SETERRQ2
6335 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6336 #undef SETERRQ3
6337 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6338 #undef SETERRQ
6339 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6340 
6341 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6342 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6343 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6344 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6345 #else
6346 #endif
6347 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6348 {
6349   Mat            mat  = *mmat;
6350   PetscInt       m    = *mm, n = *mn;
6351   InsertMode     addv = *maddv;
6352   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6353   PetscScalar    value;
6354   PetscErrorCode ierr;
6355 
6356   MatCheckPreallocated(mat,1);
6357   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6358   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6359   {
6360     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6361     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6362     PetscBool roworiented = aij->roworiented;
6363 
6364     /* Some Variables required in the macro */
6365     Mat        A                    = aij->A;
6366     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6367     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6368     MatScalar  *aa                  = a->a;
6369     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6370     Mat        B                    = aij->B;
6371     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6372     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6373     MatScalar  *ba                  = b->a;
6374     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6375      * cannot use "#if defined" inside a macro. */
6376     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6377 
6378     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6379     PetscInt  nonew = a->nonew;
6380     MatScalar *ap1,*ap2;
6381 
6382     PetscFunctionBegin;
6383     for (i=0; i<m; i++) {
6384       if (im[i] < 0) continue;
6385       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6386       if (im[i] >= rstart && im[i] < rend) {
6387         row      = im[i] - rstart;
6388         lastcol1 = -1;
6389         rp1      = aj + ai[row];
6390         ap1      = aa + ai[row];
6391         rmax1    = aimax[row];
6392         nrow1    = ailen[row];
6393         low1     = 0;
6394         high1    = nrow1;
6395         lastcol2 = -1;
6396         rp2      = bj + bi[row];
6397         ap2      = ba + bi[row];
6398         rmax2    = bimax[row];
6399         nrow2    = bilen[row];
6400         low2     = 0;
6401         high2    = nrow2;
6402 
6403         for (j=0; j<n; j++) {
6404           if (roworiented) value = v[i*n+j];
6405           else value = v[i+j*m];
6406           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6407           if (in[j] >= cstart && in[j] < cend) {
6408             col = in[j] - cstart;
6409             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6410 #if defined(PETSC_HAVE_DEVICE)
6411             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6412 #endif
6413           } else if (in[j] < 0) continue;
6414           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6415             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6416             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6417           } else {
6418             if (mat->was_assembled) {
6419               if (!aij->colmap) {
6420                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6421               }
6422 #if defined(PETSC_USE_CTABLE)
6423               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6424               col--;
6425 #else
6426               col = aij->colmap[in[j]] - 1;
6427 #endif
6428               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6429                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6430                 col  =  in[j];
6431                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6432                 B        = aij->B;
6433                 b        = (Mat_SeqAIJ*)B->data;
6434                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6435                 rp2      = bj + bi[row];
6436                 ap2      = ba + bi[row];
6437                 rmax2    = bimax[row];
6438                 nrow2    = bilen[row];
6439                 low2     = 0;
6440                 high2    = nrow2;
6441                 bm       = aij->B->rmap->n;
6442                 ba       = b->a;
6443                 inserted = PETSC_FALSE;
6444               }
6445             } else col = in[j];
6446             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6447 #if defined(PETSC_HAVE_DEVICE)
6448             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6449 #endif
6450           }
6451         }
6452       } else if (!aij->donotstash) {
6453         if (roworiented) {
6454           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6455         } else {
6456           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6457         }
6458       }
6459     }
6460   }
6461   PetscFunctionReturnVoid();
6462 }
6463 
6464 typedef struct {
6465   Mat       *mp;    /* intermediate products */
6466   PetscBool *mptmp; /* is the intermediate product temporary ? */
6467   PetscInt  cp;     /* number of intermediate products */
6468 
6469   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6470   PetscInt    *startsj_s,*startsj_r;
6471   PetscScalar *bufa;
6472   Mat         P_oth;
6473 
6474   /* may take advantage of merging product->B */
6475   Mat Bloc;
6476 
6477   /* cusparse does not have support to split between symbolic and numeric phases
6478      When api_user is true, we don't need to update the numerical values
6479      of the temporary storage */
6480   PetscBool reusesym;
6481 
6482   /* support for COO values insertion */
6483   PetscScalar  *coo_v,*coo_w;
6484   PetscInt     **own;
6485   PetscInt     **off;
6486   PetscBool    hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */
6487   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6488   PetscMemType mtype;
6489 
6490   /* customization */
6491   PetscBool abmerge;
6492   PetscBool P_oth_bind;
6493 } MatMatMPIAIJBACKEND;
6494 
6495 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6496 {
6497   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6498   PetscInt            i;
6499   PetscErrorCode      ierr;
6500 
6501   PetscFunctionBegin;
6502   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6503   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6504   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6505   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6506   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6507   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6508   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6509   for (i = 0; i < mmdata->cp; i++) {
6510     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6511   }
6512   ierr = PetscFree(mmdata->mp);CHKERRQ(ierr);
6513   ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr);
6514   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6515   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6516   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6517   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6518   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6519   PetscFunctionReturn(0);
6520 }
6521 
6522 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6523 {
6524   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6525   PetscErrorCode ierr;
6526 
6527   PetscFunctionBegin;
6528   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6529   if (f) {
6530     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6531   } else {
6532     const PetscScalar *vv;
6533 
6534     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6535     if (n && idx) {
6536       PetscScalar    *w = v;
6537       const PetscInt *oi = idx;
6538       PetscInt       j;
6539 
6540       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6541     } else {
6542       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6543     }
6544     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6545   }
6546   PetscFunctionReturn(0);
6547 }
6548 
6549 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6550 {
6551   MatMatMPIAIJBACKEND *mmdata;
6552   PetscInt            i,n_d,n_o;
6553   PetscErrorCode      ierr;
6554 
6555   PetscFunctionBegin;
6556   MatCheckProduct(C,1);
6557   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6558   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6559   if (!mmdata->reusesym) { /* update temporary matrices */
6560     if (mmdata->P_oth) {
6561       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6562     }
6563     if (mmdata->Bloc) {
6564       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6565     }
6566   }
6567   mmdata->reusesym = PETSC_FALSE;
6568 
6569   for (i = 0; i < mmdata->cp; i++) {
6570     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6571     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6572   }
6573   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6574     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6575 
6576     if (mmdata->mptmp[i]) continue;
6577     if (noff) {
6578       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6579 
6580       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6581       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6582       n_o += noff;
6583       n_d += nown;
6584     } else {
6585       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6586 
6587       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6588       n_d += mm->nz;
6589     }
6590   }
6591   if (mmdata->hasoffproc) { /* offprocess insertion */
6592     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6593     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6594   }
6595   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6596   PetscFunctionReturn(0);
6597 }
6598 
6599 /* Support for Pt * A, A * P, or Pt * A * P */
6600 #define MAX_NUMBER_INTERMEDIATE 4
6601 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6602 {
6603   Mat_Product            *product = C->product;
6604   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE];
6605   Mat_MPIAIJ             *a,*p;
6606   MatMatMPIAIJBACKEND    *mmdata;
6607   ISLocalToGlobalMapping P_oth_l2g = NULL;
6608   IS                     glob = NULL;
6609   const char             *prefix;
6610   char                   pprefix[256];
6611   const PetscInt         *globidx,*P_oth_idx;
6612   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE];
6613   PetscInt               cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j;
6614   MatProductType         ptype;
6615   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6616   PetscMPIInt            size;
6617   PetscErrorCode         ierr;
6618 
6619   PetscFunctionBegin;
6620   MatCheckProduct(C,1);
6621   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6622   ptype = product->type;
6623   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6624   switch (ptype) {
6625   case MATPRODUCT_AB:
6626     A = product->A;
6627     P = product->B;
6628     m = A->rmap->n;
6629     n = P->cmap->n;
6630     M = A->rmap->N;
6631     N = P->cmap->N;
6632     break;
6633   case MATPRODUCT_AtB:
6634     P = product->A;
6635     A = product->B;
6636     m = P->cmap->n;
6637     n = A->cmap->n;
6638     M = P->cmap->N;
6639     N = A->cmap->N;
6640     hasoffproc = PETSC_TRUE;
6641     break;
6642   case MATPRODUCT_PtAP:
6643     A = product->A;
6644     P = product->B;
6645     m = P->cmap->n;
6646     n = P->cmap->n;
6647     M = P->cmap->N;
6648     N = P->cmap->N;
6649     hasoffproc = PETSC_TRUE;
6650     break;
6651   default:
6652     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6653   }
6654   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRQ(ierr);
6655   if (size == 1) hasoffproc = PETSC_FALSE;
6656 
6657   /* defaults */
6658   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6659     mp[i]    = NULL;
6660     mptmp[i] = PETSC_FALSE;
6661     rmapt[i] = -1;
6662     cmapt[i] = -1;
6663     rmapa[i] = NULL;
6664     cmapa[i] = NULL;
6665   }
6666 
6667   /* customization */
6668   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6669   mmdata->reusesym = product->api_user;
6670   if (ptype == MATPRODUCT_AB) {
6671     if (product->api_user) {
6672       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6673       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6674       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6675       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6676     } else {
6677       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6678       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6679       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6680       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6681     }
6682   } else if (ptype == MATPRODUCT_PtAP) {
6683     if (product->api_user) {
6684       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6685       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6686       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6687     } else {
6688       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6689       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6690       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6691     }
6692   }
6693   a = (Mat_MPIAIJ*)A->data;
6694   p = (Mat_MPIAIJ*)P->data;
6695   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6696   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6697   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6698   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6699   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6700   switch (ptype) {
6701   case MATPRODUCT_AB: /* A * P */
6702     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6703 
6704     if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */
6705       /* P is product->B */
6706       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6707       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6708       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6709       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6710       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6711       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6712       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6713       mp[cp]->product->api_user = product->api_user;
6714       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6715       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6716       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6717       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6718       rmapt[cp] = 1;
6719       cmapt[cp] = 2;
6720       cmapa[cp] = globidx;
6721       mptmp[cp] = PETSC_FALSE;
6722       cp++;
6723     } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */
6724       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6725       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6726       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6727       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6728       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6729       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6730       mp[cp]->product->api_user = product->api_user;
6731       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6732       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6733       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6734       rmapt[cp] = 1;
6735       cmapt[cp] = 1;
6736       mptmp[cp] = PETSC_FALSE;
6737       cp++;
6738       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6739       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6740       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6741       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6742       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6743       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6744       mp[cp]->product->api_user = product->api_user;
6745       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6746       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6747       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6748       rmapt[cp] = 1;
6749       cmapt[cp] = 2;
6750       cmapa[cp] = p->garray;
6751       mptmp[cp] = PETSC_FALSE;
6752       cp++;
6753     }
6754     if (mmdata->P_oth) {
6755       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6756       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6757       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6758       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6759       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6760       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6761       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6762       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6763       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6764       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6765       mp[cp]->product->api_user = product->api_user;
6766       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6767       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6768       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6769       rmapt[cp] = 1;
6770       cmapt[cp] = 2;
6771       cmapa[cp] = P_oth_idx;
6772       mptmp[cp] = PETSC_FALSE;
6773       cp++;
6774     }
6775     break;
6776   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6777     /* A is product->B */
6778     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6779     if (A == P) {
6780       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6781       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6782       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6783       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6784       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6785       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6786       mp[cp]->product->api_user = product->api_user;
6787       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6788       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6789       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6790       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6791       rmapt[cp] = 2;
6792       rmapa[cp] = globidx;
6793       cmapt[cp] = 2;
6794       cmapa[cp] = globidx;
6795       mptmp[cp] = PETSC_FALSE;
6796       cp++;
6797     } else {
6798       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6799       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6800       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6801       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6802       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6803       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6804       mp[cp]->product->api_user = product->api_user;
6805       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6806       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6807       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6808       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6809       rmapt[cp] = 1;
6810       cmapt[cp] = 2;
6811       cmapa[cp] = globidx;
6812       mptmp[cp] = PETSC_FALSE;
6813       cp++;
6814       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6815       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6816       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6817       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6818       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6819       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6820       mp[cp]->product->api_user = product->api_user;
6821       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6822       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6823       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6824       rmapt[cp] = 2;
6825       rmapa[cp] = p->garray;
6826       cmapt[cp] = 2;
6827       cmapa[cp] = globidx;
6828       mptmp[cp] = PETSC_FALSE;
6829       cp++;
6830     }
6831     break;
6832   case MATPRODUCT_PtAP:
6833     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6834     /* P is product->B */
6835     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6836     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6837     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6838     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6839     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6840     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6841     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6842     mp[cp]->product->api_user = product->api_user;
6843     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6844     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6845     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6846     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6847     rmapt[cp] = 2;
6848     rmapa[cp] = globidx;
6849     cmapt[cp] = 2;
6850     cmapa[cp] = globidx;
6851     mptmp[cp] = PETSC_FALSE;
6852     cp++;
6853     if (mmdata->P_oth) {
6854       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6855       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6856       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6857       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6858       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6859       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6860       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6861       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6862       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6863       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6864       mp[cp]->product->api_user = product->api_user;
6865       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6866       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6867       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6868       mptmp[cp] = PETSC_TRUE;
6869       cp++;
6870       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6871       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6872       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6873       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6874       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6875       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6876       mp[cp]->product->api_user = product->api_user;
6877       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6878       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6879       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6880       rmapt[cp] = 2;
6881       rmapa[cp] = globidx;
6882       cmapt[cp] = 2;
6883       cmapa[cp] = P_oth_idx;
6884       mptmp[cp] = PETSC_FALSE;
6885       cp++;
6886     }
6887     break;
6888   default:
6889     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6890   }
6891   /* sanity check */
6892   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6893 
6894   ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr);
6895   for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i];
6896   ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr);
6897   for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i];
6898   mmdata->cp = cp;
6899   C->product->data       = mmdata;
6900   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6901   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6902 
6903   /* memory type */
6904   mmdata->mtype = PETSC_MEMTYPE_HOST;
6905   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6906   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6907   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6908   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6909   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6910 
6911   /* prepare coo coordinates for values insertion */
6912   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6913     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6914     if (mptmp[cp]) continue;
6915     if (rmapt[cp] == 2 && hasoffproc) {
6916       const PetscInt *rmap = rmapa[cp];
6917       const PetscInt mr = mp[cp]->rmap->n;
6918       const PetscInt rs = C->rmap->rstart;
6919       const PetscInt re = C->rmap->rend;
6920       const PetscInt *ii  = mm->i;
6921       for (i = 0; i < mr; i++) {
6922         const PetscInt gr = rmap[i];
6923         const PetscInt nz = ii[i+1] - ii[i];
6924         if (gr < rs || gr >= re) ncoo_o += nz;
6925         else ncoo_oown += nz;
6926       }
6927     } else ncoo_d += mm->nz;
6928   }
6929   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr);
6930   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6931   if (hasoffproc) { /* handle offproc values insertion */
6932     PetscSF  msf;
6933     PetscInt ncoo2,*coo_i2,*coo_j2;
6934 
6935     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6936     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6937     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr);
6938     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6939       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6940       PetscInt   *idxoff = mmdata->off[cp];
6941       PetscInt   *idxown = mmdata->own[cp];
6942       if (!mptmp[cp] && rmapt[cp] == 2) {
6943         const PetscInt *rmap = rmapa[cp];
6944         const PetscInt *cmap = cmapa[cp];
6945         const PetscInt *ii  = mm->i;
6946         PetscInt       *coi = coo_i + ncoo_o;
6947         PetscInt       *coj = coo_j + ncoo_o;
6948         const PetscInt mr = mp[cp]->rmap->n;
6949         const PetscInt rs = C->rmap->rstart;
6950         const PetscInt re = C->rmap->rend;
6951         const PetscInt cs = C->cmap->rstart;
6952         for (i = 0; i < mr; i++) {
6953           const PetscInt *jj = mm->j + ii[i];
6954           const PetscInt gr  = rmap[i];
6955           const PetscInt nz  = ii[i+1] - ii[i];
6956           if (gr < rs || gr >= re) {
6957             for (j = ii[i]; j < ii[i+1]; j++) {
6958               *coi++ = gr;
6959               *idxoff++ = j;
6960             }
6961             if (!cmapt[cp]) { /* already global */
6962               for (j = 0; j < nz; j++) *coj++ = jj[j];
6963             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6964               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6965             } else { /* offdiag */
6966               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6967             }
6968             ncoo_o += nz;
6969           } else {
6970             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6971           }
6972         }
6973       }
6974       mmdata->off[cp + 1] = idxoff;
6975       mmdata->own[cp + 1] = idxown;
6976     }
6977 
6978     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6979     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6980     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6981     ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr);
6982     ncoo = ncoo_d + ncoo_oown + ncoo2;
6983     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6984     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6985     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6986     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6987     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6988     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6989     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6990     coo_i = coo_i2;
6991     coo_j = coo_j2;
6992   } else { /* no offproc values insertion */
6993     ncoo = ncoo_d;
6994     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6995 
6996     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6997     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6998     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6999   }
7000   mmdata->hasoffproc = hasoffproc;
7001 
7002   /* on-process indices */
7003   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7004     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7005     PetscInt       *coi = coo_i + ncoo_d;
7006     PetscInt       *coj = coo_j + ncoo_d;
7007     const PetscInt *jj  = mm->j;
7008     const PetscInt *ii  = mm->i;
7009     const PetscInt *cmap = cmapa[cp];
7010     const PetscInt *rmap = rmapa[cp];
7011     const PetscInt mr = mp[cp]->rmap->n;
7012     const PetscInt rs = C->rmap->rstart;
7013     const PetscInt re = C->rmap->rend;
7014     const PetscInt cs = C->cmap->rstart;
7015 
7016     if (mptmp[cp]) continue;
7017     if (rmapt[cp] == 1) {
7018       for (i = 0; i < mr; i++) {
7019         const PetscInt gr = i + rs;
7020         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7021       }
7022       /* columns coo */
7023       if (!cmapt[cp]) {
7024         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
7025       } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7026         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs;
7027       } else { /* offdiag */
7028         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7029       }
7030       ncoo_d += mm->nz;
7031     } else if (rmapt[cp] == 2) {
7032       for (i = 0; i < mr; i++) {
7033         const PetscInt *jj = mm->j + ii[i];
7034         const PetscInt gr  = rmap[i];
7035         const PetscInt nz  = ii[i+1] - ii[i];
7036         if (gr >= rs && gr < re) {
7037           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7038           if (!cmapt[cp]) { /* already global */
7039             for (j = 0; j < nz; j++) *coj++ = jj[j];
7040           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7041             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7042           } else { /* offdiag */
7043             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7044           }
7045           ncoo_d += nz;
7046         }
7047       }
7048     }
7049   }
7050   if (glob) {
7051     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
7052   }
7053   ierr = ISDestroy(&glob);CHKERRQ(ierr);
7054   if (P_oth_l2g) {
7055     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7056   }
7057   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
7058   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
7059 
7060   /* preallocate with COO data */
7061   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
7062   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7063   PetscFunctionReturn(0);
7064 }
7065 
7066 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7067 {
7068   Mat_Product    *product = mat->product;
7069   PetscErrorCode ierr;
7070 #if defined(PETSC_HAVE_DEVICE)
7071   PetscBool      match = PETSC_FALSE;
7072   PetscBool      usecpu = PETSC_FALSE;
7073 #else
7074   PetscBool      match = PETSC_TRUE;
7075 #endif
7076 
7077   PetscFunctionBegin;
7078   MatCheckProduct(mat,1);
7079 #if defined(PETSC_HAVE_DEVICE)
7080   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7081     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7082   }
7083   if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */
7084     switch (product->type) {
7085     case MATPRODUCT_AB:
7086       if (product->api_user) {
7087         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7088         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7089         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7090       } else {
7091         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7092         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7093         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7094       }
7095       break;
7096     case MATPRODUCT_AtB:
7097       if (product->api_user) {
7098         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7099         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7100         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7101       } else {
7102         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7103         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7104         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7105       }
7106       break;
7107     case MATPRODUCT_PtAP:
7108       if (product->api_user) {
7109         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7110         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7111         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7112       } else {
7113         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7114         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7115         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7116       }
7117       break;
7118     default:
7119       break;
7120     }
7121     match = (PetscBool)!usecpu;
7122   }
7123 #endif
7124   if (match) {
7125     switch (product->type) {
7126     case MATPRODUCT_AB:
7127     case MATPRODUCT_AtB:
7128     case MATPRODUCT_PtAP:
7129       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7130       break;
7131     default:
7132       break;
7133     }
7134   }
7135   /* fallback to MPIAIJ ops */
7136   if (!mat->ops->productsymbolic) {
7137     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7138   }
7139   PetscFunctionReturn(0);
7140 }
7141