xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 45fdc0f8344bb1cf8ef7bee3d8bcb18fac84c2e4)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
21 {
22   Mat            B;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
26   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
27   PetscCall(MatDestroy(&B));
28   PetscFunctionReturn(0);
29 }
30 
31 /*MC
32    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
35    and MATMPIAIJ otherwise.  As a result, for single process communicators,
36   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
42 
43   Developer Notes:
44     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
45    enough exist.
46 
47   Level: beginner
48 
49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
50 M*/
51 
52 /*MC
53    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
54 
55    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
56    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
57    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
58   for communicators controlling multiple processes.  It is recommended that you call both of
59   the above preallocation routines for simplicity.
60 
61    Options Database Keys:
62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
63 
64   Level: beginner
65 
66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
67 M*/
68 
69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
70 {
71   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
72 
73   PetscFunctionBegin;
74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
75   A->boundtocpu = flg;
76 #endif
77   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
78   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
79 
80   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
81    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
82    * to differ from the parent matrix. */
83   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
84   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
85 
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
90 {
91   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
92 
93   PetscFunctionBegin;
94   if (mat->A) {
95     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
96     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
102 {
103   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
104   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
105   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
106   const PetscInt  *ia,*ib;
107   const MatScalar *aa,*bb,*aav,*bav;
108   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
109   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
110 
111   PetscFunctionBegin;
112   *keptrows = NULL;
113 
114   ia   = a->i;
115   ib   = b->i;
116   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
117   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
118   for (i=0; i<m; i++) {
119     na = ia[i+1] - ia[i];
120     nb = ib[i+1] - ib[i];
121     if (!na && !nb) {
122       cnt++;
123       goto ok1;
124     }
125     aa = aav + ia[i];
126     for (j=0; j<na; j++) {
127       if (aa[j] != 0.0) goto ok1;
128     }
129     bb = bav + ib[i];
130     for (j=0; j <nb; j++) {
131       if (bb[j] != 0.0) goto ok1;
132     }
133     cnt++;
134 ok1:;
135   }
136   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
137   if (!n0rows) {
138     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
140     PetscFunctionReturn(0);
141   }
142   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
143   cnt  = 0;
144   for (i=0; i<m; i++) {
145     na = ia[i+1] - ia[i];
146     nb = ib[i+1] - ib[i];
147     if (!na && !nb) continue;
148     aa = aav + ia[i];
149     for (j=0; j<na;j++) {
150       if (aa[j] != 0.0) {
151         rows[cnt++] = rstart + i;
152         goto ok2;
153       }
154     }
155     bb = bav + ib[i];
156     for (j=0; j<nb; j++) {
157       if (bb[j] != 0.0) {
158         rows[cnt++] = rstart + i;
159         goto ok2;
160       }
161     }
162 ok2:;
163   }
164   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
165   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
167   PetscFunctionReturn(0);
168 }
169 
170 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
171 {
172   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
173   PetscBool         cong;
174 
175   PetscFunctionBegin;
176   PetscCall(MatHasCongruentLayouts(Y,&cong));
177   if (Y->assembled && cong) {
178     PetscCall(MatDiagonalSet(aij->A,D,is));
179   } else {
180     PetscCall(MatDiagonalSet_Default(Y,D,is));
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
188   PetscInt       i,rstart,nrows,*rows;
189 
190   PetscFunctionBegin;
191   *zrows = NULL;
192   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
193   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
194   for (i=0; i<nrows; i++) rows[i] += rstart;
195   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
200 {
201   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
202   PetscInt          i,m,n,*garray = aij->garray;
203   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
204   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
205   PetscReal         *work;
206   const PetscScalar *dummy;
207 
208   PetscFunctionBegin;
209   PetscCall(MatGetSize(A,&m,&n));
210   PetscCall(PetscCalloc1(n,&work));
211   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
212   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
214   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
215   if (type == NORM_2) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
221     }
222   } else if (type == NORM_1) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
228     }
229   } else if (type == NORM_INFINITY) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
235     }
236   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
237     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
238       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
239     }
240     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
241       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
242     }
243   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
244     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
245       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
246     }
247     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
248       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
249     }
250   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
251   if (type == NORM_INFINITY) {
252     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
253   } else {
254     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
255   }
256   PetscCall(PetscFree(work));
257   if (type == NORM_2) {
258     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
259   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
260     for (i=0; i<n; i++) reductions[i] /= m;
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
266 {
267   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
268   IS              sis,gis;
269   const PetscInt  *isis,*igis;
270   PetscInt        n,*iis,nsis,ngis,rstart,i;
271 
272   PetscFunctionBegin;
273   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
274   PetscCall(MatFindNonzeroRows(a->B,&gis));
275   PetscCall(ISGetSize(gis,&ngis));
276   PetscCall(ISGetSize(sis,&nsis));
277   PetscCall(ISGetIndices(sis,&isis));
278   PetscCall(ISGetIndices(gis,&igis));
279 
280   PetscCall(PetscMalloc1(ngis+nsis,&iis));
281   PetscCall(PetscArraycpy(iis,igis,ngis));
282   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
283   n    = ngis + nsis;
284   PetscCall(PetscSortRemoveDupsInt(&n,iis));
285   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
286   for (i=0; i<n; i++) iis[i] += rstart;
287   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
288 
289   PetscCall(ISRestoreIndices(sis,&isis));
290   PetscCall(ISRestoreIndices(gis,&igis));
291   PetscCall(ISDestroy(&sis));
292   PetscCall(ISDestroy(&gis));
293   PetscFunctionReturn(0);
294 }
295 
296 /*
297   Local utility routine that creates a mapping from the global column
298 number to the local number in the off-diagonal part of the local
299 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
300 a slightly higher hash table cost; without it it is not scalable (each processor
301 has an order N integer array but is fast to access.
302 */
303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscInt       n = aij->B->cmap->n,i;
307 
308   PetscFunctionBegin;
309   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
310 #if defined(PETSC_USE_CTABLE)
311   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
312   for (i=0; i<n; i++) {
313     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
314   }
315 #else
316   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
317   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
318   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
319 #endif
320   PetscFunctionReturn(0);
321 }
322 
323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
324 { \
325     if (col <= lastcol1)  low1 = 0;     \
326     else                 high1 = nrow1; \
327     lastcol1 = col;\
328     while (high1-low1 > 5) { \
329       t = (low1+high1)/2; \
330       if (rp1[t] > col) high1 = t; \
331       else              low1  = t; \
332     } \
333       for (_i=low1; _i<high1; _i++) { \
334         if (rp1[_i] > col) break; \
335         if (rp1[_i] == col) { \
336           if (addv == ADD_VALUES) { \
337             ap1[_i] += value;   \
338             /* Not sure LogFlops will slow dow the code or not */ \
339             (void)PetscLogFlops(1.0);   \
340            } \
341           else                    ap1[_i] = value; \
342           goto a_noinsert; \
343         } \
344       }  \
345       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
346       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
347       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
348       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
349       N = nrow1++ - 1; a->nz++; high1++; \
350       /* shift up all the later entries in this row */ \
351       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
352       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
353       rp1[_i] = col;  \
354       ap1[_i] = value;  \
355       A->nonzerostate++;\
356       a_noinsert: ; \
357       ailen[row] = nrow1; \
358 }
359 
360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
361   { \
362     if (col <= lastcol2) low2 = 0;                        \
363     else high2 = nrow2;                                   \
364     lastcol2 = col;                                       \
365     while (high2-low2 > 5) {                              \
366       t = (low2+high2)/2;                                 \
367       if (rp2[t] > col) high2 = t;                        \
368       else             low2  = t;                         \
369     }                                                     \
370     for (_i=low2; _i<high2; _i++) {                       \
371       if (rp2[_i] > col) break;                           \
372       if (rp2[_i] == col) {                               \
373         if (addv == ADD_VALUES) {                         \
374           ap2[_i] += value;                               \
375           (void)PetscLogFlops(1.0);                       \
376         }                                                 \
377         else                    ap2[_i] = value;          \
378         goto b_noinsert;                                  \
379       }                                                   \
380     }                                                     \
381     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
382     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
383     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
384     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
385     N = nrow2++ - 1; b->nz++; high2++;                    \
386     /* shift up all the later entries in this row */      \
387     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
388     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
389     rp2[_i] = col;                                        \
390     ap2[_i] = value;                                      \
391     B->nonzerostate++;                                    \
392     b_noinsert: ;                                         \
393     bilen[row] = nrow2;                                   \
394   }
395 
396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
397 {
398   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
399   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
400   PetscInt       l,*garray = mat->garray,diag;
401   PetscScalar    *aa,*ba;
402 
403   PetscFunctionBegin;
404   /* code only works for square matrices A */
405 
406   /* find size of row to the left of the diagonal part */
407   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
408   row  = row - diag;
409   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
410     if (garray[b->j[b->i[row]+l]] > diag) break;
411   }
412   if (l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417 
418   /* diagonal part */
419   if (a->i[row+1]-a->i[row]) {
420     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
421     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
422     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
423   }
424 
425   /* right of diagonal part */
426   if (b->i[row+1]-b->i[row]-l) {
427     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
428     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
429     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
435 {
436   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
437   PetscScalar    value = 0.0;
438   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
439   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
440   PetscBool      roworiented = aij->roworiented;
441 
442   /* Some Variables required in the macro */
443   Mat        A                    = aij->A;
444   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
445   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
446   PetscBool  ignorezeroentries    = a->ignorezeroentries;
447   Mat        B                    = aij->B;
448   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
449   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
450   MatScalar  *aa,*ba;
451   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
452   PetscInt   nonew;
453   MatScalar  *ap1,*ap2;
454 
455   PetscFunctionBegin;
456   PetscCall(MatSeqAIJGetArray(A,&aa));
457   PetscCall(MatSeqAIJGetArray(B,&ba));
458   for (i=0; i<m; i++) {
459     if (im[i] < 0) continue;
460     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
461     if (im[i] >= rstart && im[i] < rend) {
462       row      = im[i] - rstart;
463       lastcol1 = -1;
464       rp1      = aj + ai[row];
465       ap1      = aa + ai[row];
466       rmax1    = aimax[row];
467       nrow1    = ailen[row];
468       low1     = 0;
469       high1    = nrow1;
470       lastcol2 = -1;
471       rp2      = bj + bi[row];
472       ap2      = ba + bi[row];
473       rmax2    = bimax[row];
474       nrow2    = bilen[row];
475       low2     = 0;
476       high2    = nrow2;
477 
478       for (j=0; j<n; j++) {
479         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
480         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
481         if (in[j] >= cstart && in[j] < cend) {
482           col   = in[j] - cstart;
483           nonew = a->nonew;
484           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
485         } else if (in[j] < 0) continue;
486         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
487         else {
488           if (mat->was_assembled) {
489             if (!aij->colmap) {
490               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
491             }
492 #if defined(PETSC_USE_CTABLE)
493             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
494             col--;
495 #else
496             col = aij->colmap[in[j]] - 1;
497 #endif
498             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
499               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
500               col  =  in[j];
501               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
502               B        = aij->B;
503               b        = (Mat_SeqAIJ*)B->data;
504               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
505               rp2      = bj + bi[row];
506               ap2      = ba + bi[row];
507               rmax2    = bimax[row];
508               nrow2    = bilen[row];
509               low2     = 0;
510               high2    = nrow2;
511               bm       = aij->B->rmap->n;
512               ba       = b->a;
513             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
514               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
515                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
516               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
517             }
518           } else col = in[j];
519           nonew = b->nonew;
520           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
521         }
522       }
523     } else {
524       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
525       if (!aij->donotstash) {
526         mat->assembled = PETSC_FALSE;
527         if (roworiented) {
528           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
529         } else {
530           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
531         }
532       }
533     }
534   }
535   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
536   PetscCall(MatSeqAIJRestoreArray(B,&ba));
537   PetscFunctionReturn(0);
538 }
539 
540 /*
541     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
542     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
543     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
544 */
545 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
546 {
547   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
548   Mat            A           = aij->A; /* diagonal part of the matrix */
549   Mat            B           = aij->B; /* offdiagonal part of the matrix */
550   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
551   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
552   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
553   PetscInt       *ailen      = a->ilen,*aj = a->j;
554   PetscInt       *bilen      = b->ilen,*bj = b->j;
555   PetscInt       am          = aij->A->rmap->n,j;
556   PetscInt       diag_so_far = 0,dnz;
557   PetscInt       offd_so_far = 0,onz;
558 
559   PetscFunctionBegin;
560   /* Iterate over all rows of the matrix */
561   for (j=0; j<am; j++) {
562     dnz = onz = 0;
563     /*  Iterate over all non-zero columns of the current row */
564     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
565       /* If column is in the diagonal */
566       if (mat_j[col] >= cstart && mat_j[col] < cend) {
567         aj[diag_so_far++] = mat_j[col] - cstart;
568         dnz++;
569       } else { /* off-diagonal entries */
570         bj[offd_so_far++] = mat_j[col];
571         onz++;
572       }
573     }
574     ailen[j] = dnz;
575     bilen[j] = onz;
576   }
577   PetscFunctionReturn(0);
578 }
579 
580 /*
581     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
582     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
583     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
584     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
585     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
586 */
587 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
588 {
589   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
590   Mat            A      = aij->A; /* diagonal part of the matrix */
591   Mat            B      = aij->B; /* offdiagonal part of the matrix */
592   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
593   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
594   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
595   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
596   PetscInt       *ailen = a->ilen,*aj = a->j;
597   PetscInt       *bilen = b->ilen,*bj = b->j;
598   PetscInt       am     = aij->A->rmap->n,j;
599   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
600   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
601   PetscScalar    *aa = a->a,*ba = b->a;
602 
603   PetscFunctionBegin;
604   /* Iterate over all rows of the matrix */
605   for (j=0; j<am; j++) {
606     dnz_row = onz_row = 0;
607     rowstart_offd = full_offd_i[j];
608     rowstart_diag = full_diag_i[j];
609     /*  Iterate over all non-zero columns of the current row */
610     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
611       /* If column is in the diagonal */
612       if (mat_j[col] >= cstart && mat_j[col] < cend) {
613         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
614         aa[rowstart_diag+dnz_row] = mat_a[col];
615         dnz_row++;
616       } else { /* off-diagonal entries */
617         bj[rowstart_offd+onz_row] = mat_j[col];
618         ba[rowstart_offd+onz_row] = mat_a[col];
619         onz_row++;
620       }
621     }
622     ailen[j] = dnz_row;
623     bilen[j] = onz_row;
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
632   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
633 
634   PetscFunctionBegin;
635   for (i=0; i<m; i++) {
636     if (idxm[i] < 0) continue; /* negative row */
637     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
638     if (idxm[i] >= rstart && idxm[i] < rend) {
639       row = idxm[i] - rstart;
640       for (j=0; j<n; j++) {
641         if (idxn[j] < 0) continue; /* negative column */
642         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
643         if (idxn[j] >= cstart && idxn[j] < cend) {
644           col  = idxn[j] - cstart;
645           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
646         } else {
647           if (!aij->colmap) {
648             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
649           }
650 #if defined(PETSC_USE_CTABLE)
651           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
652           col--;
653 #else
654           col = aij->colmap[idxn[j]] - 1;
655 #endif
656           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
657           else {
658             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
659           }
660         }
661       }
662     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
676   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
677   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
682 {
683   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
684   PetscMPIInt    n;
685   PetscInt       i,j,rstart,ncols,flg;
686   PetscInt       *row,*col;
687   PetscBool      other_disassembled;
688   PetscScalar    *val;
689 
690   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
691 
692   PetscFunctionBegin;
693   if (!aij->donotstash && !mat->nooffprocentries) {
694     while (1) {
695       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
696       if (!flg) break;
697 
698       for (i=0; i<n;) {
699         /* Now identify the consecutive vals belonging to the same row */
700         for (j=i,rstart=row[j]; j<n; j++) {
701           if (row[j] != rstart) break;
702         }
703         if (j < n) ncols = j-i;
704         else       ncols = n-i;
705         /* Now assemble all these values with a single function call */
706         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
707         i    = j;
708       }
709     }
710     PetscCall(MatStashScatterEnd_Private(&mat->stash));
711   }
712 #if defined(PETSC_HAVE_DEVICE)
713   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
714   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
715   if (mat->boundtocpu) {
716     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
717     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
718   }
719 #endif
720   PetscCall(MatAssemblyBegin(aij->A,mode));
721   PetscCall(MatAssemblyEnd(aij->A,mode));
722 
723   /* determine if any processor has disassembled, if so we must
724      also disassemble ourself, in order that we may reassemble. */
725   /*
726      if nonzero structure of submatrix B cannot change then we know that
727      no processor disassembled thus we can skip this stuff
728   */
729   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
730     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
731     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
732       PetscCall(MatDisAssemble_MPIAIJ(mat));
733     }
734   }
735   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
736     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
737   }
738   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
739 #if defined(PETSC_HAVE_DEVICE)
740   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
741 #endif
742   PetscCall(MatAssemblyBegin(aij->B,mode));
743   PetscCall(MatAssemblyEnd(aij->B,mode));
744 
745   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
746 
747   aij->rowvalues = NULL;
748 
749   PetscCall(VecDestroy(&aij->diag));
750 
751   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
752   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
753     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
754     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
755   }
756 #if defined(PETSC_HAVE_DEVICE)
757   mat->offloadmask = PETSC_OFFLOAD_BOTH;
758 #endif
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
763 {
764   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
765 
766   PetscFunctionBegin;
767   PetscCall(MatZeroEntries(l->A));
768   PetscCall(MatZeroEntries(l->B));
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
775   PetscObjectState sA, sB;
776   PetscInt        *lrows;
777   PetscInt         r, len;
778   PetscBool        cong, lch, gch;
779 
780   PetscFunctionBegin;
781   /* get locally owned rows */
782   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
783   PetscCall(MatHasCongruentLayouts(A,&cong));
784   /* fix right hand side if needed */
785   if (x && b) {
786     const PetscScalar *xx;
787     PetscScalar       *bb;
788 
789     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
790     PetscCall(VecGetArrayRead(x, &xx));
791     PetscCall(VecGetArray(b, &bb));
792     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
793     PetscCall(VecRestoreArrayRead(x, &xx));
794     PetscCall(VecRestoreArray(b, &bb));
795   }
796 
797   sA = mat->A->nonzerostate;
798   sB = mat->B->nonzerostate;
799 
800   if (diag != 0.0 && cong) {
801     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
802     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
803   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
804     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
805     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
806     PetscInt   nnwA, nnwB;
807     PetscBool  nnzA, nnzB;
808 
809     nnwA = aijA->nonew;
810     nnwB = aijB->nonew;
811     nnzA = aijA->keepnonzeropattern;
812     nnzB = aijB->keepnonzeropattern;
813     if (!nnzA) {
814       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
815       aijA->nonew = 0;
816     }
817     if (!nnzB) {
818       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
819       aijB->nonew = 0;
820     }
821     /* Must zero here before the next loop */
822     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
823     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
824     for (r = 0; r < len; ++r) {
825       const PetscInt row = lrows[r] + A->rmap->rstart;
826       if (row >= A->cmap->N) continue;
827       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
828     }
829     aijA->nonew = nnwA;
830     aijB->nonew = nnwB;
831   } else {
832     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
833     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
834   }
835   PetscCall(PetscFree(lrows));
836   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
837   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
838 
839   /* reduce nonzerostate */
840   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
841   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
842   if (gch) A->nonzerostate++;
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
847 {
848   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
849   PetscMPIInt       n = A->rmap->n;
850   PetscInt          i,j,r,m,len = 0;
851   PetscInt          *lrows,*owners = A->rmap->range;
852   PetscMPIInt       p = 0;
853   PetscSFNode       *rrows;
854   PetscSF           sf;
855   const PetscScalar *xx;
856   PetscScalar       *bb,*mask,*aij_a;
857   Vec               xmask,lmask;
858   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
859   const PetscInt    *aj, *ii,*ridx;
860   PetscScalar       *aa;
861 
862   PetscFunctionBegin;
863   /* Create SF where leaves are input rows and roots are owned rows */
864   PetscCall(PetscMalloc1(n, &lrows));
865   for (r = 0; r < n; ++r) lrows[r] = -1;
866   PetscCall(PetscMalloc1(N, &rrows));
867   for (r = 0; r < N; ++r) {
868     const PetscInt idx   = rows[r];
869     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
870     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
871       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
872     }
873     rrows[r].rank  = p;
874     rrows[r].index = rows[r] - owners[p];
875   }
876   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
877   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
878   /* Collect flags for rows to be zeroed */
879   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
880   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
881   PetscCall(PetscSFDestroy(&sf));
882   /* Compress and put in row numbers */
883   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
884   /* zero diagonal part of matrix */
885   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
886   /* handle off diagonal part of matrix */
887   PetscCall(MatCreateVecs(A,&xmask,NULL));
888   PetscCall(VecDuplicate(l->lvec,&lmask));
889   PetscCall(VecGetArray(xmask,&bb));
890   for (i=0; i<len; i++) bb[lrows[i]] = 1;
891   PetscCall(VecRestoreArray(xmask,&bb));
892   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
893   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
894   PetscCall(VecDestroy(&xmask));
895   if (x && b) { /* this code is buggy when the row and column layout don't match */
896     PetscBool cong;
897 
898     PetscCall(MatHasCongruentLayouts(A,&cong));
899     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
900     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
901     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
902     PetscCall(VecGetArrayRead(l->lvec,&xx));
903     PetscCall(VecGetArray(b,&bb));
904   }
905   PetscCall(VecGetArray(lmask,&mask));
906   /* remove zeroed rows of off diagonal matrix */
907   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
908   ii = aij->i;
909   for (i=0; i<len; i++) {
910     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
911   }
912   /* loop over all elements of off process part of matrix zeroing removed columns*/
913   if (aij->compressedrow.use) {
914     m    = aij->compressedrow.nrows;
915     ii   = aij->compressedrow.i;
916     ridx = aij->compressedrow.rindex;
917     for (i=0; i<m; i++) {
918       n  = ii[i+1] - ii[i];
919       aj = aij->j + ii[i];
920       aa = aij_a + ii[i];
921 
922       for (j=0; j<n; j++) {
923         if (PetscAbsScalar(mask[*aj])) {
924           if (b) bb[*ridx] -= *aa*xx[*aj];
925           *aa = 0.0;
926         }
927         aa++;
928         aj++;
929       }
930       ridx++;
931     }
932   } else { /* do not use compressed row format */
933     m = l->B->rmap->n;
934     for (i=0; i<m; i++) {
935       n  = ii[i+1] - ii[i];
936       aj = aij->j + ii[i];
937       aa = aij_a + ii[i];
938       for (j=0; j<n; j++) {
939         if (PetscAbsScalar(mask[*aj])) {
940           if (b) bb[i] -= *aa*xx[*aj];
941           *aa = 0.0;
942         }
943         aa++;
944         aj++;
945       }
946     }
947   }
948   if (x && b) {
949     PetscCall(VecRestoreArray(b,&bb));
950     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
951   }
952   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
953   PetscCall(VecRestoreArray(lmask,&mask));
954   PetscCall(VecDestroy(&lmask));
955   PetscCall(PetscFree(lrows));
956 
957   /* only change matrix nonzero state if pattern was allowed to be changed */
958   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
959     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
960     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
961   }
962   PetscFunctionReturn(0);
963 }
964 
965 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscInt       nt;
969   VecScatter     Mvctx = a->Mvctx;
970 
971   PetscFunctionBegin;
972   PetscCall(VecGetLocalSize(xx,&nt));
973   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
974   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
975   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
976   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
977   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984 
985   PetscFunctionBegin;
986   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
987   PetscFunctionReturn(0);
988 }
989 
990 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
991 {
992   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
993   VecScatter     Mvctx = a->Mvctx;
994 
995   PetscFunctionBegin;
996   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
997   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
998   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
999   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1004 {
1005   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1006 
1007   PetscFunctionBegin;
1008   /* do nondiagonal part */
1009   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1010   /* do local part */
1011   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1012   /* add partial results together */
1013   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1014   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1019 {
1020   MPI_Comm       comm;
1021   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1022   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1023   IS             Me,Notme;
1024   PetscInt       M,N,first,last,*notme,i;
1025   PetscBool      lf;
1026   PetscMPIInt    size;
1027 
1028   PetscFunctionBegin;
1029   /* Easy test: symmetric diagonal block */
1030   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1031   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1032   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1033   if (!*f) PetscFunctionReturn(0);
1034   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1035   PetscCallMPI(MPI_Comm_size(comm,&size));
1036   if (size == 1) PetscFunctionReturn(0);
1037 
1038   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1039   PetscCall(MatGetSize(Amat,&M,&N));
1040   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1041   PetscCall(PetscMalloc1(N-last+first,&notme));
1042   for (i=0; i<first; i++) notme[i] = i;
1043   for (i=last; i<M; i++) notme[i-last+first] = i;
1044   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1045   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1046   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1047   Aoff = Aoffs[0];
1048   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1049   Boff = Boffs[0];
1050   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1051   PetscCall(MatDestroyMatrices(1,&Aoffs));
1052   PetscCall(MatDestroyMatrices(1,&Boffs));
1053   PetscCall(ISDestroy(&Me));
1054   PetscCall(ISDestroy(&Notme));
1055   PetscCall(PetscFree(notme));
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1060 {
1061   PetscFunctionBegin;
1062   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1067 {
1068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1069 
1070   PetscFunctionBegin;
1071   /* do nondiagonal part */
1072   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1073   /* do local part */
1074   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1075   /* add partial results together */
1076   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1077   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 /*
1082   This only works correctly for square matrices where the subblock A->A is the
1083    diagonal block
1084 */
1085 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1086 {
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088 
1089   PetscFunctionBegin;
1090   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1091   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1092   PetscCall(MatGetDiagonal(a->A,v));
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099 
1100   PetscFunctionBegin;
1101   PetscCall(MatScale(a->A,aa));
1102   PetscCall(MatScale(a->B,aa));
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1107 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCall(PetscSFDestroy(&aij->coo_sf));
1113   PetscCall(PetscFree(aij->Aperm1));
1114   PetscCall(PetscFree(aij->Bperm1));
1115   PetscCall(PetscFree(aij->Ajmap1));
1116   PetscCall(PetscFree(aij->Bjmap1));
1117 
1118   PetscCall(PetscFree(aij->Aimap2));
1119   PetscCall(PetscFree(aij->Bimap2));
1120   PetscCall(PetscFree(aij->Aperm2));
1121   PetscCall(PetscFree(aij->Bperm2));
1122   PetscCall(PetscFree(aij->Ajmap2));
1123   PetscCall(PetscFree(aij->Bjmap2));
1124 
1125   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1126   PetscCall(PetscFree(aij->Cperm1));
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1131 {
1132   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1133 
1134   PetscFunctionBegin;
1135 #if defined(PETSC_USE_LOG)
1136   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1137 #endif
1138   PetscCall(MatStashDestroy_Private(&mat->stash));
1139   PetscCall(VecDestroy(&aij->diag));
1140   PetscCall(MatDestroy(&aij->A));
1141   PetscCall(MatDestroy(&aij->B));
1142 #if defined(PETSC_USE_CTABLE)
1143   PetscCall(PetscTableDestroy(&aij->colmap));
1144 #else
1145   PetscCall(PetscFree(aij->colmap));
1146 #endif
1147   PetscCall(PetscFree(aij->garray));
1148   PetscCall(VecDestroy(&aij->lvec));
1149   PetscCall(VecScatterDestroy(&aij->Mvctx));
1150   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1151   PetscCall(PetscFree(aij->ld));
1152 
1153   /* Free COO */
1154   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1155 
1156   PetscCall(PetscFree(mat->data));
1157 
1158   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1159   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1160 
1161   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1171 #if defined(PETSC_HAVE_CUDA)
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1173 #endif
1174 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1176 #endif
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1178 #if defined(PETSC_HAVE_ELEMENTAL)
1179   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1180 #endif
1181 #if defined(PETSC_HAVE_SCALAPACK)
1182   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1183 #endif
1184 #if defined(PETSC_HAVE_HYPRE)
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1187 #endif
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1194 #if defined(PETSC_HAVE_MKL_SPARSE)
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1196 #endif
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1206 {
1207   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1208   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1209   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1210   const PetscInt    *garray = aij->garray;
1211   const PetscScalar *aa,*ba;
1212   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1213   PetscInt          *rowlens;
1214   PetscInt          *colidxs;
1215   PetscScalar       *matvals;
1216 
1217   PetscFunctionBegin;
1218   PetscCall(PetscViewerSetUp(viewer));
1219 
1220   M  = mat->rmap->N;
1221   N  = mat->cmap->N;
1222   m  = mat->rmap->n;
1223   rs = mat->rmap->rstart;
1224   cs = mat->cmap->rstart;
1225   nz = A->nz + B->nz;
1226 
1227   /* write matrix header */
1228   header[0] = MAT_FILE_CLASSID;
1229   header[1] = M; header[2] = N; header[3] = nz;
1230   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1231   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1232 
1233   /* fill in and store row lengths  */
1234   PetscCall(PetscMalloc1(m,&rowlens));
1235   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1236   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1237   PetscCall(PetscFree(rowlens));
1238 
1239   /* fill in and store column indices */
1240   PetscCall(PetscMalloc1(nz,&colidxs));
1241   for (cnt=0, i=0; i<m; i++) {
1242     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1243       if (garray[B->j[jb]] > cs) break;
1244       colidxs[cnt++] = garray[B->j[jb]];
1245     }
1246     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1247       colidxs[cnt++] = A->j[ja] + cs;
1248     for (; jb<B->i[i+1]; jb++)
1249       colidxs[cnt++] = garray[B->j[jb]];
1250   }
1251   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1252   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1253   PetscCall(PetscFree(colidxs));
1254 
1255   /* fill in and store nonzero values */
1256   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1257   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1258   PetscCall(PetscMalloc1(nz,&matvals));
1259   for (cnt=0, i=0; i<m; i++) {
1260     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1261       if (garray[B->j[jb]] > cs) break;
1262       matvals[cnt++] = ba[jb];
1263     }
1264     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1265       matvals[cnt++] = aa[ja];
1266     for (; jb<B->i[i+1]; jb++)
1267       matvals[cnt++] = ba[jb];
1268   }
1269   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1270   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1271   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1272   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1273   PetscCall(PetscFree(matvals));
1274 
1275   /* write block size option to the viewer's .info file */
1276   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #include <petscdraw.h>
1281 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1282 {
1283   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1284   PetscMPIInt       rank = aij->rank,size = aij->size;
1285   PetscBool         isdraw,iascii,isbinary;
1286   PetscViewer       sviewer;
1287   PetscViewerFormat format;
1288 
1289   PetscFunctionBegin;
1290   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1293   if (iascii) {
1294     PetscCall(PetscViewerGetFormat(viewer,&format));
1295     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1296       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1297       PetscCall(PetscMalloc1(size,&nz));
1298       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1299       for (i=0; i<(PetscInt)size; i++) {
1300         nmax = PetscMax(nmax,nz[i]);
1301         nmin = PetscMin(nmin,nz[i]);
1302         navg += nz[i];
1303       }
1304       PetscCall(PetscFree(nz));
1305       navg = navg/size;
1306       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1307       PetscFunctionReturn(0);
1308     }
1309     PetscCall(PetscViewerGetFormat(viewer,&format));
1310     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1311       MatInfo   info;
1312       PetscInt *inodes=NULL;
1313 
1314       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1315       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1316       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1317       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1318       if (!inodes) {
1319         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1320                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1321       } else {
1322         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1323                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1324       }
1325       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1326       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1327       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1328       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1329       PetscCall(PetscViewerFlush(viewer));
1330       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1331       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1332       PetscCall(VecScatterView(aij->Mvctx,viewer));
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1335       PetscInt inodecount,inodelimit,*inodes;
1336       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1337       if (inodes) {
1338         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1339       } else {
1340         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1341       }
1342       PetscFunctionReturn(0);
1343     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1344       PetscFunctionReturn(0);
1345     }
1346   } else if (isbinary) {
1347     if (size == 1) {
1348       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1349       PetscCall(MatView(aij->A,viewer));
1350     } else {
1351       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1352     }
1353     PetscFunctionReturn(0);
1354   } else if (iascii && size == 1) {
1355     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1356     PetscCall(MatView(aij->A,viewer));
1357     PetscFunctionReturn(0);
1358   } else if (isdraw) {
1359     PetscDraw draw;
1360     PetscBool isnull;
1361     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1362     PetscCall(PetscDrawIsNull(draw,&isnull));
1363     if (isnull) PetscFunctionReturn(0);
1364   }
1365 
1366   { /* assemble the entire matrix onto first processor */
1367     Mat A = NULL, Av;
1368     IS  isrow,iscol;
1369 
1370     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1372     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1373     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1374 /*  The commented code uses MatCreateSubMatrices instead */
1375 /*
1376     Mat *AA, A = NULL, Av;
1377     IS  isrow,iscol;
1378 
1379     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1380     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1381     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1382     if (rank == 0) {
1383        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1384        A    = AA[0];
1385        Av   = AA[0];
1386     }
1387     PetscCall(MatDestroySubMatrices(1,&AA));
1388 */
1389     PetscCall(ISDestroy(&iscol));
1390     PetscCall(ISDestroy(&isrow));
1391     /*
1392        Everyone has to call to draw the matrix since the graphics waits are
1393        synchronized across all processors that share the PetscDraw object
1394     */
1395     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1396     if (rank == 0) {
1397       if (((PetscObject)mat)->name) {
1398         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1399       }
1400       PetscCall(MatView_SeqAIJ(Av,sviewer));
1401     }
1402     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1403     PetscCall(PetscViewerFlush(viewer));
1404     PetscCall(MatDestroy(&A));
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1410 {
1411   PetscBool      iascii,isdraw,issocket,isbinary;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1418   if (iascii || isdraw || isbinary || issocket) {
1419     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1425 {
1426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1427   Vec            bb1 = NULL;
1428   PetscBool      hasop;
1429 
1430   PetscFunctionBegin;
1431   if (flag == SOR_APPLY_UPPER) {
1432     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1433     PetscFunctionReturn(0);
1434   }
1435 
1436   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1437     PetscCall(VecDuplicate(bb,&bb1));
1438   }
1439 
1440   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1441     if (flag & SOR_ZERO_INITIAL_GUESS) {
1442       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1443       its--;
1444     }
1445 
1446     while (its--) {
1447       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1448       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1449 
1450       /* update rhs: bb1 = bb - B*x */
1451       PetscCall(VecScale(mat->lvec,-1.0));
1452       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1453 
1454       /* local sweep */
1455       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1456     }
1457   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1458     if (flag & SOR_ZERO_INITIAL_GUESS) {
1459       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1460       its--;
1461     }
1462     while (its--) {
1463       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1464       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1465 
1466       /* update rhs: bb1 = bb - B*x */
1467       PetscCall(VecScale(mat->lvec,-1.0));
1468       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1469 
1470       /* local sweep */
1471       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1472     }
1473   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1474     if (flag & SOR_ZERO_INITIAL_GUESS) {
1475       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1476       its--;
1477     }
1478     while (its--) {
1479       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1480       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1481 
1482       /* update rhs: bb1 = bb - B*x */
1483       PetscCall(VecScale(mat->lvec,-1.0));
1484       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1485 
1486       /* local sweep */
1487       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1488     }
1489   } else if (flag & SOR_EISENSTAT) {
1490     Vec xx1;
1491 
1492     PetscCall(VecDuplicate(bb,&xx1));
1493     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1494 
1495     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1496     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1497     if (!mat->diag) {
1498       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1499       PetscCall(MatGetDiagonal(matin,mat->diag));
1500     }
1501     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1502     if (hasop) {
1503       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1504     } else {
1505       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1506     }
1507     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1508 
1509     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1510 
1511     /* local sweep */
1512     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1513     PetscCall(VecAXPY(xx,1.0,xx1));
1514     PetscCall(VecDestroy(&xx1));
1515   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1516 
1517   PetscCall(VecDestroy(&bb1));
1518 
1519   matin->factorerrortype = mat->A->factorerrortype;
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1524 {
1525   Mat            aA,aB,Aperm;
1526   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1527   PetscScalar    *aa,*ba;
1528   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1529   PetscSF        rowsf,sf;
1530   IS             parcolp = NULL;
1531   PetscBool      done;
1532 
1533   PetscFunctionBegin;
1534   PetscCall(MatGetLocalSize(A,&m,&n));
1535   PetscCall(ISGetIndices(rowp,&rwant));
1536   PetscCall(ISGetIndices(colp,&cwant));
1537   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1538 
1539   /* Invert row permutation to find out where my rows should go */
1540   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1541   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1542   PetscCall(PetscSFSetFromOptions(rowsf));
1543   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1544   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1545   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1546 
1547   /* Invert column permutation to find out where my columns should go */
1548   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1549   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1550   PetscCall(PetscSFSetFromOptions(sf));
1551   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1552   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1553   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1554   PetscCall(PetscSFDestroy(&sf));
1555 
1556   PetscCall(ISRestoreIndices(rowp,&rwant));
1557   PetscCall(ISRestoreIndices(colp,&cwant));
1558   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1559 
1560   /* Find out where my gcols should go */
1561   PetscCall(MatGetSize(aB,NULL,&ng));
1562   PetscCall(PetscMalloc1(ng,&gcdest));
1563   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1564   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1565   PetscCall(PetscSFSetFromOptions(sf));
1566   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1568   PetscCall(PetscSFDestroy(&sf));
1569 
1570   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1571   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1572   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1573   for (i=0; i<m; i++) {
1574     PetscInt    row = rdest[i];
1575     PetscMPIInt rowner;
1576     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1577     for (j=ai[i]; j<ai[i+1]; j++) {
1578       PetscInt    col = cdest[aj[j]];
1579       PetscMPIInt cowner;
1580       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1581       if (rowner == cowner) dnnz[i]++;
1582       else onnz[i]++;
1583     }
1584     for (j=bi[i]; j<bi[i+1]; j++) {
1585       PetscInt    col = gcdest[bj[j]];
1586       PetscMPIInt cowner;
1587       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1588       if (rowner == cowner) dnnz[i]++;
1589       else onnz[i]++;
1590     }
1591   }
1592   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1593   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1594   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1596   PetscCall(PetscSFDestroy(&rowsf));
1597 
1598   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1599   PetscCall(MatSeqAIJGetArray(aA,&aa));
1600   PetscCall(MatSeqAIJGetArray(aB,&ba));
1601   for (i=0; i<m; i++) {
1602     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1603     PetscInt j0,rowlen;
1604     rowlen = ai[i+1] - ai[i];
1605     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1606       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1607       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1608     }
1609     rowlen = bi[i+1] - bi[i];
1610     for (j0=j=0; j<rowlen; j0=j) {
1611       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1612       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1613     }
1614   }
1615   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1616   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1617   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1618   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1619   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1620   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1621   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1622   PetscCall(PetscFree3(work,rdest,cdest));
1623   PetscCall(PetscFree(gcdest));
1624   if (parcolp) PetscCall(ISDestroy(&colp));
1625   *B = Aperm;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1630 {
1631   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1632 
1633   PetscFunctionBegin;
1634   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1635   if (ghosts) *ghosts = aij->garray;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1640 {
1641   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1642   Mat            A    = mat->A,B = mat->B;
1643   PetscLogDouble isend[5],irecv[5];
1644 
1645   PetscFunctionBegin;
1646   info->block_size = 1.0;
1647   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1648 
1649   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1650   isend[3] = info->memory;  isend[4] = info->mallocs;
1651 
1652   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1653 
1654   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1655   isend[3] += info->memory;  isend[4] += info->mallocs;
1656   if (flag == MAT_LOCAL) {
1657     info->nz_used      = isend[0];
1658     info->nz_allocated = isend[1];
1659     info->nz_unneeded  = isend[2];
1660     info->memory       = isend[3];
1661     info->mallocs      = isend[4];
1662   } else if (flag == MAT_GLOBAL_MAX) {
1663     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1664 
1665     info->nz_used      = irecv[0];
1666     info->nz_allocated = irecv[1];
1667     info->nz_unneeded  = irecv[2];
1668     info->memory       = irecv[3];
1669     info->mallocs      = irecv[4];
1670   } else if (flag == MAT_GLOBAL_SUM) {
1671     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1672 
1673     info->nz_used      = irecv[0];
1674     info->nz_allocated = irecv[1];
1675     info->nz_unneeded  = irecv[2];
1676     info->memory       = irecv[3];
1677     info->mallocs      = irecv[4];
1678   }
1679   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1680   info->fill_ratio_needed = 0;
1681   info->factor_mallocs    = 0;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1686 {
1687   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1688 
1689   PetscFunctionBegin;
1690   switch (op) {
1691   case MAT_NEW_NONZERO_LOCATIONS:
1692   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1693   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1694   case MAT_KEEP_NONZERO_PATTERN:
1695   case MAT_NEW_NONZERO_LOCATION_ERR:
1696   case MAT_USE_INODES:
1697   case MAT_IGNORE_ZERO_ENTRIES:
1698   case MAT_FORM_EXPLICIT_TRANSPOSE:
1699     MatCheckPreallocated(A,1);
1700     PetscCall(MatSetOption(a->A,op,flg));
1701     PetscCall(MatSetOption(a->B,op,flg));
1702     break;
1703   case MAT_ROW_ORIENTED:
1704     MatCheckPreallocated(A,1);
1705     a->roworiented = flg;
1706 
1707     PetscCall(MatSetOption(a->A,op,flg));
1708     PetscCall(MatSetOption(a->B,op,flg));
1709     break;
1710   case MAT_FORCE_DIAGONAL_ENTRIES:
1711   case MAT_SORTED_FULL:
1712     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1713     break;
1714   case MAT_IGNORE_OFF_PROC_ENTRIES:
1715     a->donotstash = flg;
1716     break;
1717   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1718   case MAT_SPD:
1719   case MAT_SYMMETRIC:
1720   case MAT_STRUCTURALLY_SYMMETRIC:
1721   case MAT_HERMITIAN:
1722   case MAT_SYMMETRY_ETERNAL:
1723     break;
1724   case MAT_SUBMAT_SINGLEIS:
1725     A->submat_singleis = flg;
1726     break;
1727   case MAT_STRUCTURE_ONLY:
1728     /* The option is handled directly by MatSetOption() */
1729     break;
1730   default:
1731     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1732   }
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1737 {
1738   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1739   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1740   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1741   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1742   PetscInt       *cmap,*idx_p;
1743 
1744   PetscFunctionBegin;
1745   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1746   mat->getrowactive = PETSC_TRUE;
1747 
1748   if (!mat->rowvalues && (idx || v)) {
1749     /*
1750         allocate enough space to hold information from the longest row.
1751     */
1752     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1753     PetscInt   max = 1,tmp;
1754     for (i=0; i<matin->rmap->n; i++) {
1755       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1756       if (max < tmp) max = tmp;
1757     }
1758     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1759   }
1760 
1761   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1762   lrow = row - rstart;
1763 
1764   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1765   if (!v)   {pvA = NULL; pvB = NULL;}
1766   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1767   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1768   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1769   nztot = nzA + nzB;
1770 
1771   cmap = mat->garray;
1772   if (v  || idx) {
1773     if (nztot) {
1774       /* Sort by increasing column numbers, assuming A and B already sorted */
1775       PetscInt imark = -1;
1776       if (v) {
1777         *v = v_p = mat->rowvalues;
1778         for (i=0; i<nzB; i++) {
1779           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1780           else break;
1781         }
1782         imark = i;
1783         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1784         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1785       }
1786       if (idx) {
1787         *idx = idx_p = mat->rowindices;
1788         if (imark > -1) {
1789           for (i=0; i<imark; i++) {
1790             idx_p[i] = cmap[cworkB[i]];
1791           }
1792         } else {
1793           for (i=0; i<nzB; i++) {
1794             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1795             else break;
1796           }
1797           imark = i;
1798         }
1799         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1800         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1801       }
1802     } else {
1803       if (idx) *idx = NULL;
1804       if (v)   *v   = NULL;
1805     }
1806   }
1807   *nz  = nztot;
1808   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1809   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1814 {
1815   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1816 
1817   PetscFunctionBegin;
1818   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1819   aij->getrowactive = PETSC_FALSE;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1824 {
1825   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1826   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1827   PetscInt        i,j,cstart = mat->cmap->rstart;
1828   PetscReal       sum = 0.0;
1829   const MatScalar *v,*amata,*bmata;
1830 
1831   PetscFunctionBegin;
1832   if (aij->size == 1) {
1833     PetscCall(MatNorm(aij->A,type,norm));
1834   } else {
1835     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1836     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1837     if (type == NORM_FROBENIUS) {
1838       v = amata;
1839       for (i=0; i<amat->nz; i++) {
1840         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1841       }
1842       v = bmata;
1843       for (i=0; i<bmat->nz; i++) {
1844         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1845       }
1846       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1847       *norm = PetscSqrtReal(*norm);
1848       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1849     } else if (type == NORM_1) { /* max column norm */
1850       PetscReal *tmp,*tmp2;
1851       PetscInt  *jj,*garray = aij->garray;
1852       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1853       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1854       *norm = 0.0;
1855       v     = amata; jj = amat->j;
1856       for (j=0; j<amat->nz; j++) {
1857         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1858       }
1859       v = bmata; jj = bmat->j;
1860       for (j=0; j<bmat->nz; j++) {
1861         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1862       }
1863       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1864       for (j=0; j<mat->cmap->N; j++) {
1865         if (tmp2[j] > *norm) *norm = tmp2[j];
1866       }
1867       PetscCall(PetscFree(tmp));
1868       PetscCall(PetscFree(tmp2));
1869       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1870     } else if (type == NORM_INFINITY) { /* max row norm */
1871       PetscReal ntemp = 0.0;
1872       for (j=0; j<aij->A->rmap->n; j++) {
1873         v   = amata + amat->i[j];
1874         sum = 0.0;
1875         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1876           sum += PetscAbsScalar(*v); v++;
1877         }
1878         v = bmata + bmat->i[j];
1879         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1880           sum += PetscAbsScalar(*v); v++;
1881         }
1882         if (sum > ntemp) ntemp = sum;
1883       }
1884       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1885       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1886     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1887     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1888     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1894 {
1895   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1896   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1897   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1898   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1899   Mat             B,A_diag,*B_diag;
1900   const MatScalar *pbv,*bv;
1901 
1902   PetscFunctionBegin;
1903   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1904   ai = Aloc->i; aj = Aloc->j;
1905   bi = Bloc->i; bj = Bloc->j;
1906   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1907     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1908     PetscSFNode          *oloc;
1909     PETSC_UNUSED PetscSF sf;
1910 
1911     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1912     /* compute d_nnz for preallocation */
1913     PetscCall(PetscArrayzero(d_nnz,na));
1914     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1915     /* compute local off-diagonal contributions */
1916     PetscCall(PetscArrayzero(g_nnz,nb));
1917     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1918     /* map those to global */
1919     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1920     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1921     PetscCall(PetscSFSetFromOptions(sf));
1922     PetscCall(PetscArrayzero(o_nnz,na));
1923     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1924     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1925     PetscCall(PetscSFDestroy(&sf));
1926 
1927     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1928     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1929     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1930     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1931     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1932     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1933   } else {
1934     B    = *matout;
1935     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1936   }
1937 
1938   b           = (Mat_MPIAIJ*)B->data;
1939   A_diag      = a->A;
1940   B_diag      = &b->A;
1941   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1942   A_diag_ncol = A_diag->cmap->N;
1943   B_diag_ilen = sub_B_diag->ilen;
1944   B_diag_i    = sub_B_diag->i;
1945 
1946   /* Set ilen for diagonal of B */
1947   for (i=0; i<A_diag_ncol; i++) {
1948     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1949   }
1950 
1951   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1952   very quickly (=without using MatSetValues), because all writes are local. */
1953   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1954 
1955   /* copy over the B part */
1956   PetscCall(PetscMalloc1(bi[mb],&cols));
1957   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1958   pbv  = bv;
1959   row  = A->rmap->rstart;
1960   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1961   cols_tmp = cols;
1962   for (i=0; i<mb; i++) {
1963     ncol = bi[i+1]-bi[i];
1964     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1965     row++;
1966     pbv += ncol; cols_tmp += ncol;
1967   }
1968   PetscCall(PetscFree(cols));
1969   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1970 
1971   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1972   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1973   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1974     *matout = B;
1975   } else {
1976     PetscCall(MatHeaderMerge(A,&B));
1977   }
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1982 {
1983   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1984   Mat            a    = aij->A,b = aij->B;
1985   PetscInt       s1,s2,s3;
1986 
1987   PetscFunctionBegin;
1988   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1989   if (rr) {
1990     PetscCall(VecGetLocalSize(rr,&s1));
1991     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1992     /* Overlap communication with computation. */
1993     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1994   }
1995   if (ll) {
1996     PetscCall(VecGetLocalSize(ll,&s1));
1997     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1998     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1999   }
2000   /* scale  the diagonal block */
2001   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2002 
2003   if (rr) {
2004     /* Do a scatter end and then right scale the off-diagonal block */
2005     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2006     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2007   }
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2012 {
2013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2014 
2015   PetscFunctionBegin;
2016   PetscCall(MatSetUnfactored(a->A));
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2021 {
2022   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2023   Mat            a,b,c,d;
2024   PetscBool      flg;
2025 
2026   PetscFunctionBegin;
2027   a = matA->A; b = matA->B;
2028   c = matB->A; d = matB->B;
2029 
2030   PetscCall(MatEqual(a,c,&flg));
2031   if (flg) {
2032     PetscCall(MatEqual(b,d,&flg));
2033   }
2034   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2039 {
2040   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2041   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2042 
2043   PetscFunctionBegin;
2044   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2045   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2046     /* because of the column compression in the off-processor part of the matrix a->B,
2047        the number of columns in a->B and b->B may be different, hence we cannot call
2048        the MatCopy() directly on the two parts. If need be, we can provide a more
2049        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2050        then copying the submatrices */
2051     PetscCall(MatCopy_Basic(A,B,str));
2052   } else {
2053     PetscCall(MatCopy(a->A,b->A,str));
2054     PetscCall(MatCopy(a->B,b->B,str));
2055   }
2056   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2061 {
2062   PetscFunctionBegin;
2063   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 /*
2068    Computes the number of nonzeros per row needed for preallocation when X and Y
2069    have different nonzero structure.
2070 */
2071 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2072 {
2073   PetscInt       i,j,k,nzx,nzy;
2074 
2075   PetscFunctionBegin;
2076   /* Set the number of nonzeros in the new matrix */
2077   for (i=0; i<m; i++) {
2078     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2079     nzx = xi[i+1] - xi[i];
2080     nzy = yi[i+1] - yi[i];
2081     nnz[i] = 0;
2082     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2083       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2084       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2085       nnz[i]++;
2086     }
2087     for (; k<nzy; k++) nnz[i]++;
2088   }
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2093 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2094 {
2095   PetscInt       m = Y->rmap->N;
2096   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2097   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2098 
2099   PetscFunctionBegin;
2100   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2105 {
2106   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2107 
2108   PetscFunctionBegin;
2109   if (str == SAME_NONZERO_PATTERN) {
2110     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2111     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2112   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2113     PetscCall(MatAXPY_Basic(Y,a,X,str));
2114   } else {
2115     Mat      B;
2116     PetscInt *nnz_d,*nnz_o;
2117 
2118     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2119     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2120     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2121     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2122     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2123     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2124     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2125     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2126     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2127     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2128     PetscCall(MatHeaderMerge(Y,&B));
2129     PetscCall(PetscFree(nnz_d));
2130     PetscCall(PetscFree(nnz_o));
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2136 
2137 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2138 {
2139   PetscFunctionBegin;
2140   if (PetscDefined(USE_COMPLEX)) {
2141     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2142 
2143     PetscCall(MatConjugate_SeqAIJ(aij->A));
2144     PetscCall(MatConjugate_SeqAIJ(aij->B));
2145   }
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2150 {
2151   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2152 
2153   PetscFunctionBegin;
2154   PetscCall(MatRealPart(a->A));
2155   PetscCall(MatRealPart(a->B));
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2160 {
2161   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2162 
2163   PetscFunctionBegin;
2164   PetscCall(MatImaginaryPart(a->A));
2165   PetscCall(MatImaginaryPart(a->B));
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2170 {
2171   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2172   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2173   PetscScalar       *va,*vv;
2174   Vec               vB,vA;
2175   const PetscScalar *vb;
2176 
2177   PetscFunctionBegin;
2178   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2179   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2180 
2181   PetscCall(VecGetArrayWrite(vA,&va));
2182   if (idx) {
2183     for (i=0; i<m; i++) {
2184       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2185     }
2186   }
2187 
2188   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2189   PetscCall(PetscMalloc1(m,&idxb));
2190   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2191 
2192   PetscCall(VecGetArrayWrite(v,&vv));
2193   PetscCall(VecGetArrayRead(vB,&vb));
2194   for (i=0; i<m; i++) {
2195     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2196       vv[i] = vb[i];
2197       if (idx) idx[i] = a->garray[idxb[i]];
2198     } else {
2199       vv[i] = va[i];
2200       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2201         idx[i] = a->garray[idxb[i]];
2202     }
2203   }
2204   PetscCall(VecRestoreArrayWrite(vA,&vv));
2205   PetscCall(VecRestoreArrayWrite(vA,&va));
2206   PetscCall(VecRestoreArrayRead(vB,&vb));
2207   PetscCall(PetscFree(idxb));
2208   PetscCall(VecDestroy(&vA));
2209   PetscCall(VecDestroy(&vB));
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2214 {
2215   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2216   PetscInt          m = A->rmap->n,n = A->cmap->n;
2217   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2218   PetscInt          *cmap  = mat->garray;
2219   PetscInt          *diagIdx, *offdiagIdx;
2220   Vec               diagV, offdiagV;
2221   PetscScalar       *a, *diagA, *offdiagA;
2222   const PetscScalar *ba,*bav;
2223   PetscInt          r,j,col,ncols,*bi,*bj;
2224   Mat               B = mat->B;
2225   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2226 
2227   PetscFunctionBegin;
2228   /* When a process holds entire A and other processes have no entry */
2229   if (A->cmap->N == n) {
2230     PetscCall(VecGetArrayWrite(v,&diagA));
2231     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2232     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2233     PetscCall(VecDestroy(&diagV));
2234     PetscCall(VecRestoreArrayWrite(v,&diagA));
2235     PetscFunctionReturn(0);
2236   } else if (n == 0) {
2237     if (m) {
2238       PetscCall(VecGetArrayWrite(v,&a));
2239       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2240       PetscCall(VecRestoreArrayWrite(v,&a));
2241     }
2242     PetscFunctionReturn(0);
2243   }
2244 
2245   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2246   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2248   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2249 
2250   /* Get offdiagIdx[] for implicit 0.0 */
2251   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2252   ba   = bav;
2253   bi   = b->i;
2254   bj   = b->j;
2255   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2256   for (r = 0; r < m; r++) {
2257     ncols = bi[r+1] - bi[r];
2258     if (ncols == A->cmap->N - n) { /* Brow is dense */
2259       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2260     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2261       offdiagA[r] = 0.0;
2262 
2263       /* Find first hole in the cmap */
2264       for (j=0; j<ncols; j++) {
2265         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2266         if (col > j && j < cstart) {
2267           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2268           break;
2269         } else if (col > j + n && j >= cstart) {
2270           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2271           break;
2272         }
2273       }
2274       if (j == ncols && ncols < A->cmap->N - n) {
2275         /* a hole is outside compressed Bcols */
2276         if (ncols == 0) {
2277           if (cstart) {
2278             offdiagIdx[r] = 0;
2279           } else offdiagIdx[r] = cend;
2280         } else { /* ncols > 0 */
2281           offdiagIdx[r] = cmap[ncols-1] + 1;
2282           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2283         }
2284       }
2285     }
2286 
2287     for (j=0; j<ncols; j++) {
2288       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2289       ba++; bj++;
2290     }
2291   }
2292 
2293   PetscCall(VecGetArrayWrite(v, &a));
2294   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2295   for (r = 0; r < m; ++r) {
2296     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2297       a[r]   = diagA[r];
2298       if (idx) idx[r] = cstart + diagIdx[r];
2299     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2300       a[r] = diagA[r];
2301       if (idx) {
2302         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2303           idx[r] = cstart + diagIdx[r];
2304         } else idx[r] = offdiagIdx[r];
2305       }
2306     } else {
2307       a[r]   = offdiagA[r];
2308       if (idx) idx[r] = offdiagIdx[r];
2309     }
2310   }
2311   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2312   PetscCall(VecRestoreArrayWrite(v, &a));
2313   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2314   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2315   PetscCall(VecDestroy(&diagV));
2316   PetscCall(VecDestroy(&offdiagV));
2317   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2322 {
2323   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2324   PetscInt          m = A->rmap->n,n = A->cmap->n;
2325   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2326   PetscInt          *cmap  = mat->garray;
2327   PetscInt          *diagIdx, *offdiagIdx;
2328   Vec               diagV, offdiagV;
2329   PetscScalar       *a, *diagA, *offdiagA;
2330   const PetscScalar *ba,*bav;
2331   PetscInt          r,j,col,ncols,*bi,*bj;
2332   Mat               B = mat->B;
2333   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2334 
2335   PetscFunctionBegin;
2336   /* When a process holds entire A and other processes have no entry */
2337   if (A->cmap->N == n) {
2338     PetscCall(VecGetArrayWrite(v,&diagA));
2339     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2340     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2341     PetscCall(VecDestroy(&diagV));
2342     PetscCall(VecRestoreArrayWrite(v,&diagA));
2343     PetscFunctionReturn(0);
2344   } else if (n == 0) {
2345     if (m) {
2346       PetscCall(VecGetArrayWrite(v,&a));
2347       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2348       PetscCall(VecRestoreArrayWrite(v,&a));
2349     }
2350     PetscFunctionReturn(0);
2351   }
2352 
2353   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2354   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2355   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2356   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2357 
2358   /* Get offdiagIdx[] for implicit 0.0 */
2359   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2360   ba   = bav;
2361   bi   = b->i;
2362   bj   = b->j;
2363   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2364   for (r = 0; r < m; r++) {
2365     ncols = bi[r+1] - bi[r];
2366     if (ncols == A->cmap->N - n) { /* Brow is dense */
2367       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2368     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2369       offdiagA[r] = 0.0;
2370 
2371       /* Find first hole in the cmap */
2372       for (j=0; j<ncols; j++) {
2373         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2374         if (col > j && j < cstart) {
2375           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2376           break;
2377         } else if (col > j + n && j >= cstart) {
2378           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2379           break;
2380         }
2381       }
2382       if (j == ncols && ncols < A->cmap->N - n) {
2383         /* a hole is outside compressed Bcols */
2384         if (ncols == 0) {
2385           if (cstart) {
2386             offdiagIdx[r] = 0;
2387           } else offdiagIdx[r] = cend;
2388         } else { /* ncols > 0 */
2389           offdiagIdx[r] = cmap[ncols-1] + 1;
2390           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2391         }
2392       }
2393     }
2394 
2395     for (j=0; j<ncols; j++) {
2396       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2397       ba++; bj++;
2398     }
2399   }
2400 
2401   PetscCall(VecGetArrayWrite(v, &a));
2402   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2403   for (r = 0; r < m; ++r) {
2404     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2405       a[r]   = diagA[r];
2406       if (idx) idx[r] = cstart + diagIdx[r];
2407     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2408       a[r] = diagA[r];
2409       if (idx) {
2410         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2411           idx[r] = cstart + diagIdx[r];
2412         } else idx[r] = offdiagIdx[r];
2413       }
2414     } else {
2415       a[r]   = offdiagA[r];
2416       if (idx) idx[r] = offdiagIdx[r];
2417     }
2418   }
2419   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2420   PetscCall(VecRestoreArrayWrite(v, &a));
2421   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2422   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2423   PetscCall(VecDestroy(&diagV));
2424   PetscCall(VecDestroy(&offdiagV));
2425   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2430 {
2431   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2432   PetscInt          m = A->rmap->n,n = A->cmap->n;
2433   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2434   PetscInt          *cmap  = mat->garray;
2435   PetscInt          *diagIdx, *offdiagIdx;
2436   Vec               diagV, offdiagV;
2437   PetscScalar       *a, *diagA, *offdiagA;
2438   const PetscScalar *ba,*bav;
2439   PetscInt          r,j,col,ncols,*bi,*bj;
2440   Mat               B = mat->B;
2441   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2442 
2443   PetscFunctionBegin;
2444   /* When a process holds entire A and other processes have no entry */
2445   if (A->cmap->N == n) {
2446     PetscCall(VecGetArrayWrite(v,&diagA));
2447     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2448     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2449     PetscCall(VecDestroy(&diagV));
2450     PetscCall(VecRestoreArrayWrite(v,&diagA));
2451     PetscFunctionReturn(0);
2452   } else if (n == 0) {
2453     if (m) {
2454       PetscCall(VecGetArrayWrite(v,&a));
2455       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2456       PetscCall(VecRestoreArrayWrite(v,&a));
2457     }
2458     PetscFunctionReturn(0);
2459   }
2460 
2461   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2462   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2463   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2464   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2465 
2466   /* Get offdiagIdx[] for implicit 0.0 */
2467   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2468   ba   = bav;
2469   bi   = b->i;
2470   bj   = b->j;
2471   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2472   for (r = 0; r < m; r++) {
2473     ncols = bi[r+1] - bi[r];
2474     if (ncols == A->cmap->N - n) { /* Brow is dense */
2475       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2476     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2477       offdiagA[r] = 0.0;
2478 
2479       /* Find first hole in the cmap */
2480       for (j=0; j<ncols; j++) {
2481         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2482         if (col > j && j < cstart) {
2483           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2484           break;
2485         } else if (col > j + n && j >= cstart) {
2486           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2487           break;
2488         }
2489       }
2490       if (j == ncols && ncols < A->cmap->N - n) {
2491         /* a hole is outside compressed Bcols */
2492         if (ncols == 0) {
2493           if (cstart) {
2494             offdiagIdx[r] = 0;
2495           } else offdiagIdx[r] = cend;
2496         } else { /* ncols > 0 */
2497           offdiagIdx[r] = cmap[ncols-1] + 1;
2498           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2499         }
2500       }
2501     }
2502 
2503     for (j=0; j<ncols; j++) {
2504       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2505       ba++; bj++;
2506     }
2507   }
2508 
2509   PetscCall(VecGetArrayWrite(v,    &a));
2510   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2511   for (r = 0; r < m; ++r) {
2512     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2513       a[r] = diagA[r];
2514       if (idx) idx[r] = cstart + diagIdx[r];
2515     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2516       a[r] = diagA[r];
2517       if (idx) {
2518         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2519           idx[r] = cstart + diagIdx[r];
2520         } else idx[r] = offdiagIdx[r];
2521       }
2522     } else {
2523       a[r] = offdiagA[r];
2524       if (idx) idx[r] = offdiagIdx[r];
2525     }
2526   }
2527   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2528   PetscCall(VecRestoreArrayWrite(v,       &a));
2529   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2530   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2531   PetscCall(VecDestroy(&diagV));
2532   PetscCall(VecDestroy(&offdiagV));
2533   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2538 {
2539   Mat            *dummy;
2540 
2541   PetscFunctionBegin;
2542   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2543   *newmat = *dummy;
2544   PetscCall(PetscFree(dummy));
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2549 {
2550   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2551 
2552   PetscFunctionBegin;
2553   PetscCall(MatInvertBlockDiagonal(a->A,values));
2554   A->factorerrortype = a->A->factorerrortype;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2559 {
2560   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2561 
2562   PetscFunctionBegin;
2563   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2564   PetscCall(MatSetRandom(aij->A,rctx));
2565   if (x->assembled) {
2566     PetscCall(MatSetRandom(aij->B,rctx));
2567   } else {
2568     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2569   }
2570   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2571   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2576 {
2577   PetscFunctionBegin;
2578   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2579   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 /*@
2584    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2585 
2586    Collective on Mat
2587 
2588    Input Parameters:
2589 +    A - the matrix
2590 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2591 
2592  Level: advanced
2593 
2594 @*/
2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2596 {
2597   PetscFunctionBegin;
2598   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2603 {
2604   PetscBool            sc = PETSC_FALSE,flg;
2605 
2606   PetscFunctionBegin;
2607   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2608   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2609   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2610   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2611   PetscOptionsHeadEnd();
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2616 {
2617   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2618   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2619 
2620   PetscFunctionBegin;
2621   if (!Y->preallocated) {
2622     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2623   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2624     PetscInt nonew = aij->nonew;
2625     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2626     aij->nonew = nonew;
2627   }
2628   PetscCall(MatShift_Basic(Y,a));
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2633 {
2634   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2635 
2636   PetscFunctionBegin;
2637   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2638   PetscCall(MatMissingDiagonal(a->A,missing,d));
2639   if (d) {
2640     PetscInt rstart;
2641     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2642     *d += rstart;
2643 
2644   }
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2649 {
2650   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2651 
2652   PetscFunctionBegin;
2653   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 /* -------------------------------------------------------------------*/
2658 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2659                                        MatGetRow_MPIAIJ,
2660                                        MatRestoreRow_MPIAIJ,
2661                                        MatMult_MPIAIJ,
2662                                 /* 4*/ MatMultAdd_MPIAIJ,
2663                                        MatMultTranspose_MPIAIJ,
2664                                        MatMultTransposeAdd_MPIAIJ,
2665                                        NULL,
2666                                        NULL,
2667                                        NULL,
2668                                 /*10*/ NULL,
2669                                        NULL,
2670                                        NULL,
2671                                        MatSOR_MPIAIJ,
2672                                        MatTranspose_MPIAIJ,
2673                                 /*15*/ MatGetInfo_MPIAIJ,
2674                                        MatEqual_MPIAIJ,
2675                                        MatGetDiagonal_MPIAIJ,
2676                                        MatDiagonalScale_MPIAIJ,
2677                                        MatNorm_MPIAIJ,
2678                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2679                                        MatAssemblyEnd_MPIAIJ,
2680                                        MatSetOption_MPIAIJ,
2681                                        MatZeroEntries_MPIAIJ,
2682                                 /*24*/ MatZeroRows_MPIAIJ,
2683                                        NULL,
2684                                        NULL,
2685                                        NULL,
2686                                        NULL,
2687                                 /*29*/ MatSetUp_MPIAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        MatGetDiagonalBlock_MPIAIJ,
2691                                        NULL,
2692                                 /*34*/ MatDuplicate_MPIAIJ,
2693                                        NULL,
2694                                        NULL,
2695                                        NULL,
2696                                        NULL,
2697                                 /*39*/ MatAXPY_MPIAIJ,
2698                                        MatCreateSubMatrices_MPIAIJ,
2699                                        MatIncreaseOverlap_MPIAIJ,
2700                                        MatGetValues_MPIAIJ,
2701                                        MatCopy_MPIAIJ,
2702                                 /*44*/ MatGetRowMax_MPIAIJ,
2703                                        MatScale_MPIAIJ,
2704                                        MatShift_MPIAIJ,
2705                                        MatDiagonalSet_MPIAIJ,
2706                                        MatZeroRowsColumns_MPIAIJ,
2707                                 /*49*/ MatSetRandom_MPIAIJ,
2708                                        MatGetRowIJ_MPIAIJ,
2709                                        MatRestoreRowIJ_MPIAIJ,
2710                                        NULL,
2711                                        NULL,
2712                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2713                                        NULL,
2714                                        MatSetUnfactored_MPIAIJ,
2715                                        MatPermute_MPIAIJ,
2716                                        NULL,
2717                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2718                                        MatDestroy_MPIAIJ,
2719                                        MatView_MPIAIJ,
2720                                        NULL,
2721                                        NULL,
2722                                 /*64*/ NULL,
2723                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2728                                        MatGetRowMinAbs_MPIAIJ,
2729                                        NULL,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                 /*75*/ MatFDColoringApply_AIJ,
2734                                        MatSetFromOptions_MPIAIJ,
2735                                        NULL,
2736                                        NULL,
2737                                        MatFindZeroDiagonals_MPIAIJ,
2738                                 /*80*/ NULL,
2739                                        NULL,
2740                                        NULL,
2741                                 /*83*/ MatLoad_MPIAIJ,
2742                                        MatIsSymmetric_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                 /*89*/ NULL,
2748                                        NULL,
2749                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2750                                        NULL,
2751                                        NULL,
2752                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        MatBindToCPU_MPIAIJ,
2757                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        MatConjugate_MPIAIJ,
2761                                        NULL,
2762                                 /*104*/MatSetValuesRow_MPIAIJ,
2763                                        MatRealPart_MPIAIJ,
2764                                        MatImaginaryPart_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                 /*109*/NULL,
2768                                        NULL,
2769                                        MatGetRowMin_MPIAIJ,
2770                                        NULL,
2771                                        MatMissingDiagonal_MPIAIJ,
2772                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2773                                        NULL,
2774                                        MatGetGhosts_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2778                                        NULL,
2779                                        NULL,
2780                                        NULL,
2781                                        MatGetMultiProcBlock_MPIAIJ,
2782                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2783                                        MatGetColumnReductions_MPIAIJ,
2784                                        MatInvertBlockDiagonal_MPIAIJ,
2785                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2786                                        MatCreateSubMatricesMPI_MPIAIJ,
2787                                 /*129*/NULL,
2788                                        NULL,
2789                                        NULL,
2790                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2791                                        NULL,
2792                                 /*134*/NULL,
2793                                        NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        NULL,
2797                                 /*139*/MatSetBlockSizes_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                        MatFDColoringSetUp_MPIXAIJ,
2801                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2802                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2803                                 /*145*/NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        MatCreateGraph_Simple_AIJ,
2807                                        MatFilter_AIJ
2808 };
2809 
2810 /* ----------------------------------------------------------------------------------------*/
2811 
2812 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2813 {
2814   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2815 
2816   PetscFunctionBegin;
2817   PetscCall(MatStoreValues(aij->A));
2818   PetscCall(MatStoreValues(aij->B));
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2823 {
2824   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2825 
2826   PetscFunctionBegin;
2827   PetscCall(MatRetrieveValues(aij->A));
2828   PetscCall(MatRetrieveValues(aij->B));
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2833 {
2834   Mat_MPIAIJ     *b;
2835   PetscMPIInt    size;
2836 
2837   PetscFunctionBegin;
2838   PetscCall(PetscLayoutSetUp(B->rmap));
2839   PetscCall(PetscLayoutSetUp(B->cmap));
2840   b = (Mat_MPIAIJ*)B->data;
2841 
2842 #if defined(PETSC_USE_CTABLE)
2843   PetscCall(PetscTableDestroy(&b->colmap));
2844 #else
2845   PetscCall(PetscFree(b->colmap));
2846 #endif
2847   PetscCall(PetscFree(b->garray));
2848   PetscCall(VecDestroy(&b->lvec));
2849   PetscCall(VecScatterDestroy(&b->Mvctx));
2850 
2851   /* Because the B will have been resized we simply destroy it and create a new one each time */
2852   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2853   PetscCall(MatDestroy(&b->B));
2854   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2855   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2856   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2857   PetscCall(MatSetType(b->B,MATSEQAIJ));
2858   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2859 
2860   if (!B->preallocated) {
2861     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2862     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2863     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2864     PetscCall(MatSetType(b->A,MATSEQAIJ));
2865     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2866   }
2867 
2868   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2869   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2870   B->preallocated  = PETSC_TRUE;
2871   B->was_assembled = PETSC_FALSE;
2872   B->assembled     = PETSC_FALSE;
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2877 {
2878   Mat_MPIAIJ     *b;
2879 
2880   PetscFunctionBegin;
2881   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2882   PetscCall(PetscLayoutSetUp(B->rmap));
2883   PetscCall(PetscLayoutSetUp(B->cmap));
2884   b = (Mat_MPIAIJ*)B->data;
2885 
2886 #if defined(PETSC_USE_CTABLE)
2887   PetscCall(PetscTableDestroy(&b->colmap));
2888 #else
2889   PetscCall(PetscFree(b->colmap));
2890 #endif
2891   PetscCall(PetscFree(b->garray));
2892   PetscCall(VecDestroy(&b->lvec));
2893   PetscCall(VecScatterDestroy(&b->Mvctx));
2894 
2895   PetscCall(MatResetPreallocation(b->A));
2896   PetscCall(MatResetPreallocation(b->B));
2897   B->preallocated  = PETSC_TRUE;
2898   B->was_assembled = PETSC_FALSE;
2899   B->assembled = PETSC_FALSE;
2900   PetscFunctionReturn(0);
2901 }
2902 
2903 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2904 {
2905   Mat            mat;
2906   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2907 
2908   PetscFunctionBegin;
2909   *newmat = NULL;
2910   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2911   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2912   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2913   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2914   a       = (Mat_MPIAIJ*)mat->data;
2915 
2916   mat->factortype   = matin->factortype;
2917   mat->assembled    = matin->assembled;
2918   mat->insertmode   = NOT_SET_VALUES;
2919   mat->preallocated = matin->preallocated;
2920 
2921   a->size         = oldmat->size;
2922   a->rank         = oldmat->rank;
2923   a->donotstash   = oldmat->donotstash;
2924   a->roworiented  = oldmat->roworiented;
2925   a->rowindices   = NULL;
2926   a->rowvalues    = NULL;
2927   a->getrowactive = PETSC_FALSE;
2928 
2929   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2930   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2931 
2932   if (oldmat->colmap) {
2933 #if defined(PETSC_USE_CTABLE)
2934     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2935 #else
2936     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2937     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2938     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2939 #endif
2940   } else a->colmap = NULL;
2941   if (oldmat->garray) {
2942     PetscInt len;
2943     len  = oldmat->B->cmap->n;
2944     PetscCall(PetscMalloc1(len+1,&a->garray));
2945     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2946     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2947   } else a->garray = NULL;
2948 
2949   /* It may happen MatDuplicate is called with a non-assembled matrix
2950      In fact, MatDuplicate only requires the matrix to be preallocated
2951      This may happen inside a DMCreateMatrix_Shell */
2952   if (oldmat->lvec) {
2953     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2954     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2955   }
2956   if (oldmat->Mvctx) {
2957     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2958     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2959   }
2960   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2961   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2962   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2963   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2964   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2965   *newmat = mat;
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2970 {
2971   PetscBool      isbinary, ishdf5;
2972 
2973   PetscFunctionBegin;
2974   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2975   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2976   /* force binary viewer to load .info file if it has not yet done so */
2977   PetscCall(PetscViewerSetUp(viewer));
2978   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2979   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2980   if (isbinary) {
2981     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2982   } else if (ishdf5) {
2983 #if defined(PETSC_HAVE_HDF5)
2984     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2985 #else
2986     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2987 #endif
2988   } else {
2989     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2990   }
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2995 {
2996   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2997   PetscInt       *rowidxs,*colidxs;
2998   PetscScalar    *matvals;
2999 
3000   PetscFunctionBegin;
3001   PetscCall(PetscViewerSetUp(viewer));
3002 
3003   /* read in matrix header */
3004   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3005   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3006   M  = header[1]; N = header[2]; nz = header[3];
3007   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3008   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3009   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3010 
3011   /* set block sizes from the viewer's .info file */
3012   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3013   /* set global sizes if not set already */
3014   if (mat->rmap->N < 0) mat->rmap->N = M;
3015   if (mat->cmap->N < 0) mat->cmap->N = N;
3016   PetscCall(PetscLayoutSetUp(mat->rmap));
3017   PetscCall(PetscLayoutSetUp(mat->cmap));
3018 
3019   /* check if the matrix sizes are correct */
3020   PetscCall(MatGetSize(mat,&rows,&cols));
3021   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3022 
3023   /* read in row lengths and build row indices */
3024   PetscCall(MatGetLocalSize(mat,&m,NULL));
3025   PetscCall(PetscMalloc1(m+1,&rowidxs));
3026   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3027   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3028   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3029   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3030   /* read in column indices and matrix values */
3031   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3032   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3033   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3034   /* store matrix indices and values */
3035   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3036   PetscCall(PetscFree(rowidxs));
3037   PetscCall(PetscFree2(colidxs,matvals));
3038   PetscFunctionReturn(0);
3039 }
3040 
3041 /* Not scalable because of ISAllGather() unless getting all columns. */
3042 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3043 {
3044   IS             iscol_local;
3045   PetscBool      isstride;
3046   PetscMPIInt    lisstride=0,gisstride;
3047 
3048   PetscFunctionBegin;
3049   /* check if we are grabbing all columns*/
3050   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3051 
3052   if (isstride) {
3053     PetscInt  start,len,mstart,mlen;
3054     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3055     PetscCall(ISGetLocalSize(iscol,&len));
3056     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3057     if (mstart == start && mlen-mstart == len) lisstride = 1;
3058   }
3059 
3060   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3061   if (gisstride) {
3062     PetscInt N;
3063     PetscCall(MatGetSize(mat,NULL,&N));
3064     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3065     PetscCall(ISSetIdentity(iscol_local));
3066     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3067   } else {
3068     PetscInt cbs;
3069     PetscCall(ISGetBlockSize(iscol,&cbs));
3070     PetscCall(ISAllGather(iscol,&iscol_local));
3071     PetscCall(ISSetBlockSize(iscol_local,cbs));
3072   }
3073 
3074   *isseq = iscol_local;
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 /*
3079  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3080  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3081 
3082  Input Parameters:
3083    mat - matrix
3084    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3085            i.e., mat->rstart <= isrow[i] < mat->rend
3086    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3087            i.e., mat->cstart <= iscol[i] < mat->cend
3088  Output Parameter:
3089    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3090    iscol_o - sequential column index set for retrieving mat->B
3091    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3092  */
3093 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3094 {
3095   Vec            x,cmap;
3096   const PetscInt *is_idx;
3097   PetscScalar    *xarray,*cmaparray;
3098   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3099   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3100   Mat            B=a->B;
3101   Vec            lvec=a->lvec,lcmap;
3102   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3103   MPI_Comm       comm;
3104   VecScatter     Mvctx=a->Mvctx;
3105 
3106   PetscFunctionBegin;
3107   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3108   PetscCall(ISGetLocalSize(iscol,&ncols));
3109 
3110   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3111   PetscCall(MatCreateVecs(mat,&x,NULL));
3112   PetscCall(VecSet(x,-1.0));
3113   PetscCall(VecDuplicate(x,&cmap));
3114   PetscCall(VecSet(cmap,-1.0));
3115 
3116   /* Get start indices */
3117   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3118   isstart -= ncols;
3119   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3120 
3121   PetscCall(ISGetIndices(iscol,&is_idx));
3122   PetscCall(VecGetArray(x,&xarray));
3123   PetscCall(VecGetArray(cmap,&cmaparray));
3124   PetscCall(PetscMalloc1(ncols,&idx));
3125   for (i=0; i<ncols; i++) {
3126     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3127     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3128     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3129   }
3130   PetscCall(VecRestoreArray(x,&xarray));
3131   PetscCall(VecRestoreArray(cmap,&cmaparray));
3132   PetscCall(ISRestoreIndices(iscol,&is_idx));
3133 
3134   /* Get iscol_d */
3135   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3136   PetscCall(ISGetBlockSize(iscol,&i));
3137   PetscCall(ISSetBlockSize(*iscol_d,i));
3138 
3139   /* Get isrow_d */
3140   PetscCall(ISGetLocalSize(isrow,&m));
3141   rstart = mat->rmap->rstart;
3142   PetscCall(PetscMalloc1(m,&idx));
3143   PetscCall(ISGetIndices(isrow,&is_idx));
3144   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3145   PetscCall(ISRestoreIndices(isrow,&is_idx));
3146 
3147   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3148   PetscCall(ISGetBlockSize(isrow,&i));
3149   PetscCall(ISSetBlockSize(*isrow_d,i));
3150 
3151   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3152   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3153   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3154 
3155   PetscCall(VecDuplicate(lvec,&lcmap));
3156 
3157   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3158   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3159 
3160   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3161   /* off-process column indices */
3162   count = 0;
3163   PetscCall(PetscMalloc1(Bn,&idx));
3164   PetscCall(PetscMalloc1(Bn,&cmap1));
3165 
3166   PetscCall(VecGetArray(lvec,&xarray));
3167   PetscCall(VecGetArray(lcmap,&cmaparray));
3168   for (i=0; i<Bn; i++) {
3169     if (PetscRealPart(xarray[i]) > -1.0) {
3170       idx[count]     = i;                   /* local column index in off-diagonal part B */
3171       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3172       count++;
3173     }
3174   }
3175   PetscCall(VecRestoreArray(lvec,&xarray));
3176   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3177 
3178   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3179   /* cannot ensure iscol_o has same blocksize as iscol! */
3180 
3181   PetscCall(PetscFree(idx));
3182   *garray = cmap1;
3183 
3184   PetscCall(VecDestroy(&x));
3185   PetscCall(VecDestroy(&cmap));
3186   PetscCall(VecDestroy(&lcmap));
3187   PetscFunctionReturn(0);
3188 }
3189 
3190 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3191 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3192 {
3193   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3194   Mat            M = NULL;
3195   MPI_Comm       comm;
3196   IS             iscol_d,isrow_d,iscol_o;
3197   Mat            Asub = NULL,Bsub = NULL;
3198   PetscInt       n;
3199 
3200   PetscFunctionBegin;
3201   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3202 
3203   if (call == MAT_REUSE_MATRIX) {
3204     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3205     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3206     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3207 
3208     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3209     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3210 
3211     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3212     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3213 
3214     /* Update diagonal and off-diagonal portions of submat */
3215     asub = (Mat_MPIAIJ*)(*submat)->data;
3216     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3217     PetscCall(ISGetLocalSize(iscol_o,&n));
3218     if (n) {
3219       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3220     }
3221     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3222     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3223 
3224   } else { /* call == MAT_INITIAL_MATRIX) */
3225     const PetscInt *garray;
3226     PetscInt        BsubN;
3227 
3228     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3229     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3230 
3231     /* Create local submatrices Asub and Bsub */
3232     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3233     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3234 
3235     /* Create submatrix M */
3236     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3237 
3238     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3239     asub = (Mat_MPIAIJ*)M->data;
3240 
3241     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3242     n = asub->B->cmap->N;
3243     if (BsubN > n) {
3244       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3245       const PetscInt *idx;
3246       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3247       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3248 
3249       PetscCall(PetscMalloc1(n,&idx_new));
3250       j = 0;
3251       PetscCall(ISGetIndices(iscol_o,&idx));
3252       for (i=0; i<n; i++) {
3253         if (j >= BsubN) break;
3254         while (subgarray[i] > garray[j]) j++;
3255 
3256         if (subgarray[i] == garray[j]) {
3257           idx_new[i] = idx[j++];
3258         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3259       }
3260       PetscCall(ISRestoreIndices(iscol_o,&idx));
3261 
3262       PetscCall(ISDestroy(&iscol_o));
3263       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3264 
3265     } else if (BsubN < n) {
3266       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3267     }
3268 
3269     PetscCall(PetscFree(garray));
3270     *submat = M;
3271 
3272     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3273     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3274     PetscCall(ISDestroy(&isrow_d));
3275 
3276     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3277     PetscCall(ISDestroy(&iscol_d));
3278 
3279     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3280     PetscCall(ISDestroy(&iscol_o));
3281   }
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3286 {
3287   IS             iscol_local=NULL,isrow_d;
3288   PetscInt       csize;
3289   PetscInt       n,i,j,start,end;
3290   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3291   MPI_Comm       comm;
3292 
3293   PetscFunctionBegin;
3294   /* If isrow has same processor distribution as mat,
3295      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3296   if (call == MAT_REUSE_MATRIX) {
3297     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3298     if (isrow_d) {
3299       sameRowDist  = PETSC_TRUE;
3300       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3301     } else {
3302       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3303       if (iscol_local) {
3304         sameRowDist  = PETSC_TRUE;
3305         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3306       }
3307     }
3308   } else {
3309     /* Check if isrow has same processor distribution as mat */
3310     sameDist[0] = PETSC_FALSE;
3311     PetscCall(ISGetLocalSize(isrow,&n));
3312     if (!n) {
3313       sameDist[0] = PETSC_TRUE;
3314     } else {
3315       PetscCall(ISGetMinMax(isrow,&i,&j));
3316       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3317       if (i >= start && j < end) {
3318         sameDist[0] = PETSC_TRUE;
3319       }
3320     }
3321 
3322     /* Check if iscol has same processor distribution as mat */
3323     sameDist[1] = PETSC_FALSE;
3324     PetscCall(ISGetLocalSize(iscol,&n));
3325     if (!n) {
3326       sameDist[1] = PETSC_TRUE;
3327     } else {
3328       PetscCall(ISGetMinMax(iscol,&i,&j));
3329       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3330       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3331     }
3332 
3333     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3334     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3335     sameRowDist = tsameDist[0];
3336   }
3337 
3338   if (sameRowDist) {
3339     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3340       /* isrow and iscol have same processor distribution as mat */
3341       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3342       PetscFunctionReturn(0);
3343     } else { /* sameRowDist */
3344       /* isrow has same processor distribution as mat */
3345       if (call == MAT_INITIAL_MATRIX) {
3346         PetscBool sorted;
3347         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3348         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3349         PetscCall(ISGetSize(iscol,&i));
3350         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3351 
3352         PetscCall(ISSorted(iscol_local,&sorted));
3353         if (sorted) {
3354           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3355           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3356           PetscFunctionReturn(0);
3357         }
3358       } else { /* call == MAT_REUSE_MATRIX */
3359         IS iscol_sub;
3360         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3361         if (iscol_sub) {
3362           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3363           PetscFunctionReturn(0);
3364         }
3365       }
3366     }
3367   }
3368 
3369   /* General case: iscol -> iscol_local which has global size of iscol */
3370   if (call == MAT_REUSE_MATRIX) {
3371     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3372     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3373   } else {
3374     if (!iscol_local) {
3375       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3376     }
3377   }
3378 
3379   PetscCall(ISGetLocalSize(iscol,&csize));
3380   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3381 
3382   if (call == MAT_INITIAL_MATRIX) {
3383     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3384     PetscCall(ISDestroy(&iscol_local));
3385   }
3386   PetscFunctionReturn(0);
3387 }
3388 
3389 /*@C
3390      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3391          and "off-diagonal" part of the matrix in CSR format.
3392 
3393    Collective
3394 
3395    Input Parameters:
3396 +  comm - MPI communicator
3397 .  A - "diagonal" portion of matrix
3398 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3399 -  garray - global index of B columns
3400 
3401    Output Parameter:
3402 .   mat - the matrix, with input A as its local diagonal matrix
3403    Level: advanced
3404 
3405    Notes:
3406        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3407        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3408 
3409 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3410 @*/
3411 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3412 {
3413   Mat_MPIAIJ        *maij;
3414   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3415   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3416   const PetscScalar *oa;
3417   Mat               Bnew;
3418   PetscInt          m,n,N;
3419 
3420   PetscFunctionBegin;
3421   PetscCall(MatCreate(comm,mat));
3422   PetscCall(MatGetSize(A,&m,&n));
3423   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3424   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3425   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3426   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3427 
3428   /* Get global columns of mat */
3429   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3430 
3431   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3432   PetscCall(MatSetType(*mat,MATMPIAIJ));
3433   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3434   maij = (Mat_MPIAIJ*)(*mat)->data;
3435 
3436   (*mat)->preallocated = PETSC_TRUE;
3437 
3438   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3439   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3440 
3441   /* Set A as diagonal portion of *mat */
3442   maij->A = A;
3443 
3444   nz = oi[m];
3445   for (i=0; i<nz; i++) {
3446     col   = oj[i];
3447     oj[i] = garray[col];
3448   }
3449 
3450   /* Set Bnew as off-diagonal portion of *mat */
3451   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3452   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3453   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3454   bnew        = (Mat_SeqAIJ*)Bnew->data;
3455   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3456   maij->B     = Bnew;
3457 
3458   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3459 
3460   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3461   b->free_a       = PETSC_FALSE;
3462   b->free_ij      = PETSC_FALSE;
3463   PetscCall(MatDestroy(&B));
3464 
3465   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3466   bnew->free_a       = PETSC_TRUE;
3467   bnew->free_ij      = PETSC_TRUE;
3468 
3469   /* condense columns of maij->B */
3470   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3471   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3472   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3473   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3474   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3475   PetscFunctionReturn(0);
3476 }
3477 
3478 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3479 
3480 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3481 {
3482   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3483   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3484   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3485   Mat            M,Msub,B=a->B;
3486   MatScalar      *aa;
3487   Mat_SeqAIJ     *aij;
3488   PetscInt       *garray = a->garray,*colsub,Ncols;
3489   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3490   IS             iscol_sub,iscmap;
3491   const PetscInt *is_idx,*cmap;
3492   PetscBool      allcolumns=PETSC_FALSE;
3493   MPI_Comm       comm;
3494 
3495   PetscFunctionBegin;
3496   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3497   if (call == MAT_REUSE_MATRIX) {
3498     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3499     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3500     PetscCall(ISGetLocalSize(iscol_sub,&count));
3501 
3502     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3503     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3504 
3505     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3506     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3507 
3508     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3509 
3510   } else { /* call == MAT_INITIAL_MATRIX) */
3511     PetscBool flg;
3512 
3513     PetscCall(ISGetLocalSize(iscol,&n));
3514     PetscCall(ISGetSize(iscol,&Ncols));
3515 
3516     /* (1) iscol -> nonscalable iscol_local */
3517     /* Check for special case: each processor gets entire matrix columns */
3518     PetscCall(ISIdentity(iscol_local,&flg));
3519     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3520     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3521     if (allcolumns) {
3522       iscol_sub = iscol_local;
3523       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3524       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3525 
3526     } else {
3527       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3528       PetscInt *idx,*cmap1,k;
3529       PetscCall(PetscMalloc1(Ncols,&idx));
3530       PetscCall(PetscMalloc1(Ncols,&cmap1));
3531       PetscCall(ISGetIndices(iscol_local,&is_idx));
3532       count = 0;
3533       k     = 0;
3534       for (i=0; i<Ncols; i++) {
3535         j = is_idx[i];
3536         if (j >= cstart && j < cend) {
3537           /* diagonal part of mat */
3538           idx[count]     = j;
3539           cmap1[count++] = i; /* column index in submat */
3540         } else if (Bn) {
3541           /* off-diagonal part of mat */
3542           if (j == garray[k]) {
3543             idx[count]     = j;
3544             cmap1[count++] = i;  /* column index in submat */
3545           } else if (j > garray[k]) {
3546             while (j > garray[k] && k < Bn-1) k++;
3547             if (j == garray[k]) {
3548               idx[count]     = j;
3549               cmap1[count++] = i; /* column index in submat */
3550             }
3551           }
3552         }
3553       }
3554       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3555 
3556       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3557       PetscCall(ISGetBlockSize(iscol,&cbs));
3558       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3559 
3560       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3561     }
3562 
3563     /* (3) Create sequential Msub */
3564     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3565   }
3566 
3567   PetscCall(ISGetLocalSize(iscol_sub,&count));
3568   aij  = (Mat_SeqAIJ*)(Msub)->data;
3569   ii   = aij->i;
3570   PetscCall(ISGetIndices(iscmap,&cmap));
3571 
3572   /*
3573       m - number of local rows
3574       Ncols - number of columns (same on all processors)
3575       rstart - first row in new global matrix generated
3576   */
3577   PetscCall(MatGetSize(Msub,&m,NULL));
3578 
3579   if (call == MAT_INITIAL_MATRIX) {
3580     /* (4) Create parallel newmat */
3581     PetscMPIInt    rank,size;
3582     PetscInt       csize;
3583 
3584     PetscCallMPI(MPI_Comm_size(comm,&size));
3585     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3586 
3587     /*
3588         Determine the number of non-zeros in the diagonal and off-diagonal
3589         portions of the matrix in order to do correct preallocation
3590     */
3591 
3592     /* first get start and end of "diagonal" columns */
3593     PetscCall(ISGetLocalSize(iscol,&csize));
3594     if (csize == PETSC_DECIDE) {
3595       PetscCall(ISGetSize(isrow,&mglobal));
3596       if (mglobal == Ncols) { /* square matrix */
3597         nlocal = m;
3598       } else {
3599         nlocal = Ncols/size + ((Ncols % size) > rank);
3600       }
3601     } else {
3602       nlocal = csize;
3603     }
3604     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3605     rstart = rend - nlocal;
3606     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3607 
3608     /* next, compute all the lengths */
3609     jj    = aij->j;
3610     PetscCall(PetscMalloc1(2*m+1,&dlens));
3611     olens = dlens + m;
3612     for (i=0; i<m; i++) {
3613       jend = ii[i+1] - ii[i];
3614       olen = 0;
3615       dlen = 0;
3616       for (j=0; j<jend; j++) {
3617         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3618         else dlen++;
3619         jj++;
3620       }
3621       olens[i] = olen;
3622       dlens[i] = dlen;
3623     }
3624 
3625     PetscCall(ISGetBlockSize(isrow,&bs));
3626     PetscCall(ISGetBlockSize(iscol,&cbs));
3627 
3628     PetscCall(MatCreate(comm,&M));
3629     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3630     PetscCall(MatSetBlockSizes(M,bs,cbs));
3631     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3632     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3633     PetscCall(PetscFree(dlens));
3634 
3635   } else { /* call == MAT_REUSE_MATRIX */
3636     M    = *newmat;
3637     PetscCall(MatGetLocalSize(M,&i,NULL));
3638     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3639     PetscCall(MatZeroEntries(M));
3640     /*
3641          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3642        rather than the slower MatSetValues().
3643     */
3644     M->was_assembled = PETSC_TRUE;
3645     M->assembled     = PETSC_FALSE;
3646   }
3647 
3648   /* (5) Set values of Msub to *newmat */
3649   PetscCall(PetscMalloc1(count,&colsub));
3650   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3651 
3652   jj   = aij->j;
3653   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3654   for (i=0; i<m; i++) {
3655     row = rstart + i;
3656     nz  = ii[i+1] - ii[i];
3657     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3658     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3659     jj += nz; aa += nz;
3660   }
3661   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3662   PetscCall(ISRestoreIndices(iscmap,&cmap));
3663 
3664   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3665   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3666 
3667   PetscCall(PetscFree(colsub));
3668 
3669   /* save Msub, iscol_sub and iscmap used in processor for next request */
3670   if (call == MAT_INITIAL_MATRIX) {
3671     *newmat = M;
3672     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3673     PetscCall(MatDestroy(&Msub));
3674 
3675     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3676     PetscCall(ISDestroy(&iscol_sub));
3677 
3678     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3679     PetscCall(ISDestroy(&iscmap));
3680 
3681     if (iscol_local) {
3682       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3683       PetscCall(ISDestroy(&iscol_local));
3684     }
3685   }
3686   PetscFunctionReturn(0);
3687 }
3688 
3689 /*
3690     Not great since it makes two copies of the submatrix, first an SeqAIJ
3691   in local and then by concatenating the local matrices the end result.
3692   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3693 
3694   Note: This requires a sequential iscol with all indices.
3695 */
3696 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3697 {
3698   PetscMPIInt    rank,size;
3699   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3700   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3701   Mat            M,Mreuse;
3702   MatScalar      *aa,*vwork;
3703   MPI_Comm       comm;
3704   Mat_SeqAIJ     *aij;
3705   PetscBool      colflag,allcolumns=PETSC_FALSE;
3706 
3707   PetscFunctionBegin;
3708   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3709   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3710   PetscCallMPI(MPI_Comm_size(comm,&size));
3711 
3712   /* Check for special case: each processor gets entire matrix columns */
3713   PetscCall(ISIdentity(iscol,&colflag));
3714   PetscCall(ISGetLocalSize(iscol,&n));
3715   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3716   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3717 
3718   if (call ==  MAT_REUSE_MATRIX) {
3719     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3720     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3721     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3722   } else {
3723     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3724   }
3725 
3726   /*
3727       m - number of local rows
3728       n - number of columns (same on all processors)
3729       rstart - first row in new global matrix generated
3730   */
3731   PetscCall(MatGetSize(Mreuse,&m,&n));
3732   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3733   if (call == MAT_INITIAL_MATRIX) {
3734     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3735     ii  = aij->i;
3736     jj  = aij->j;
3737 
3738     /*
3739         Determine the number of non-zeros in the diagonal and off-diagonal
3740         portions of the matrix in order to do correct preallocation
3741     */
3742 
3743     /* first get start and end of "diagonal" columns */
3744     if (csize == PETSC_DECIDE) {
3745       PetscCall(ISGetSize(isrow,&mglobal));
3746       if (mglobal == n) { /* square matrix */
3747         nlocal = m;
3748       } else {
3749         nlocal = n/size + ((n % size) > rank);
3750       }
3751     } else {
3752       nlocal = csize;
3753     }
3754     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3755     rstart = rend - nlocal;
3756     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3757 
3758     /* next, compute all the lengths */
3759     PetscCall(PetscMalloc1(2*m+1,&dlens));
3760     olens = dlens + m;
3761     for (i=0; i<m; i++) {
3762       jend = ii[i+1] - ii[i];
3763       olen = 0;
3764       dlen = 0;
3765       for (j=0; j<jend; j++) {
3766         if (*jj < rstart || *jj >= rend) olen++;
3767         else dlen++;
3768         jj++;
3769       }
3770       olens[i] = olen;
3771       dlens[i] = dlen;
3772     }
3773     PetscCall(MatCreate(comm,&M));
3774     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3775     PetscCall(MatSetBlockSizes(M,bs,cbs));
3776     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3777     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3778     PetscCall(PetscFree(dlens));
3779   } else {
3780     PetscInt ml,nl;
3781 
3782     M    = *newmat;
3783     PetscCall(MatGetLocalSize(M,&ml,&nl));
3784     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3785     PetscCall(MatZeroEntries(M));
3786     /*
3787          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3788        rather than the slower MatSetValues().
3789     */
3790     M->was_assembled = PETSC_TRUE;
3791     M->assembled     = PETSC_FALSE;
3792   }
3793   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3794   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3795   ii   = aij->i;
3796   jj   = aij->j;
3797 
3798   /* trigger copy to CPU if needed */
3799   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3800   for (i=0; i<m; i++) {
3801     row   = rstart + i;
3802     nz    = ii[i+1] - ii[i];
3803     cwork = jj; jj += nz;
3804     vwork = aa; aa += nz;
3805     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3806   }
3807   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3808 
3809   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3810   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3811   *newmat = M;
3812 
3813   /* save submatrix used in processor for next request */
3814   if (call ==  MAT_INITIAL_MATRIX) {
3815     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3816     PetscCall(MatDestroy(&Mreuse));
3817   }
3818   PetscFunctionReturn(0);
3819 }
3820 
3821 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3822 {
3823   PetscInt       m,cstart, cend,j,nnz,i,d;
3824   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3825   const PetscInt *JJ;
3826   PetscBool      nooffprocentries;
3827 
3828   PetscFunctionBegin;
3829   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3830 
3831   PetscCall(PetscLayoutSetUp(B->rmap));
3832   PetscCall(PetscLayoutSetUp(B->cmap));
3833   m      = B->rmap->n;
3834   cstart = B->cmap->rstart;
3835   cend   = B->cmap->rend;
3836   rstart = B->rmap->rstart;
3837 
3838   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3839 
3840   if (PetscDefined(USE_DEBUG)) {
3841     for (i=0; i<m; i++) {
3842       nnz = Ii[i+1]- Ii[i];
3843       JJ  = J + Ii[i];
3844       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3845       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3846       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3847     }
3848   }
3849 
3850   for (i=0; i<m; i++) {
3851     nnz     = Ii[i+1]- Ii[i];
3852     JJ      = J + Ii[i];
3853     nnz_max = PetscMax(nnz_max,nnz);
3854     d       = 0;
3855     for (j=0; j<nnz; j++) {
3856       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3857     }
3858     d_nnz[i] = d;
3859     o_nnz[i] = nnz - d;
3860   }
3861   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3862   PetscCall(PetscFree2(d_nnz,o_nnz));
3863 
3864   for (i=0; i<m; i++) {
3865     ii   = i + rstart;
3866     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3867   }
3868   nooffprocentries    = B->nooffprocentries;
3869   B->nooffprocentries = PETSC_TRUE;
3870   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3871   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3872   B->nooffprocentries = nooffprocentries;
3873 
3874   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3875   PetscFunctionReturn(0);
3876 }
3877 
3878 /*@
3879    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3880    (the default parallel PETSc format).
3881 
3882    Collective
3883 
3884    Input Parameters:
3885 +  B - the matrix
3886 .  i - the indices into j for the start of each local row (starts with zero)
3887 .  j - the column indices for each local row (starts with zero)
3888 -  v - optional values in the matrix
3889 
3890    Level: developer
3891 
3892    Notes:
3893        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3894      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3895      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3896 
3897        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3898 
3899        The format which is used for the sparse matrix input, is equivalent to a
3900     row-major ordering.. i.e for the following matrix, the input data expected is
3901     as shown
3902 
3903 $        1 0 0
3904 $        2 0 3     P0
3905 $       -------
3906 $        4 5 6     P1
3907 $
3908 $     Process0 [P0]: rows_owned=[0,1]
3909 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3910 $        j =  {0,0,2}  [size = 3]
3911 $        v =  {1,2,3}  [size = 3]
3912 $
3913 $     Process1 [P1]: rows_owned=[2]
3914 $        i =  {0,3}    [size = nrow+1  = 1+1]
3915 $        j =  {0,1,2}  [size = 3]
3916 $        v =  {4,5,6}  [size = 3]
3917 
3918 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3919           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3920 @*/
3921 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3922 {
3923   PetscFunctionBegin;
3924   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3925   PetscFunctionReturn(0);
3926 }
3927 
3928 /*@C
3929    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3930    (the default parallel PETSc format).  For good matrix assembly performance
3931    the user should preallocate the matrix storage by setting the parameters
3932    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3933    performance can be increased by more than a factor of 50.
3934 
3935    Collective
3936 
3937    Input Parameters:
3938 +  B - the matrix
3939 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3940            (same value is used for all local rows)
3941 .  d_nnz - array containing the number of nonzeros in the various rows of the
3942            DIAGONAL portion of the local submatrix (possibly different for each row)
3943            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3944            The size of this array is equal to the number of local rows, i.e 'm'.
3945            For matrices that will be factored, you must leave room for (and set)
3946            the diagonal entry even if it is zero.
3947 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3948            submatrix (same value is used for all local rows).
3949 -  o_nnz - array containing the number of nonzeros in the various rows of the
3950            OFF-DIAGONAL portion of the local submatrix (possibly different for
3951            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3952            structure. The size of this array is equal to the number
3953            of local rows, i.e 'm'.
3954 
3955    If the *_nnz parameter is given then the *_nz parameter is ignored
3956 
3957    The AIJ format (also called the Yale sparse matrix format or
3958    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3959    storage.  The stored row and column indices begin with zero.
3960    See Users-Manual: ch_mat for details.
3961 
3962    The parallel matrix is partitioned such that the first m0 rows belong to
3963    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3964    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3965 
3966    The DIAGONAL portion of the local submatrix of a processor can be defined
3967    as the submatrix which is obtained by extraction the part corresponding to
3968    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3969    first row that belongs to the processor, r2 is the last row belonging to
3970    the this processor, and c1-c2 is range of indices of the local part of a
3971    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3972    common case of a square matrix, the row and column ranges are the same and
3973    the DIAGONAL part is also square. The remaining portion of the local
3974    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3975 
3976    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3977 
3978    You can call MatGetInfo() to get information on how effective the preallocation was;
3979    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3980    You can also run with the option -info and look for messages with the string
3981    malloc in them to see if additional memory allocation was needed.
3982 
3983    Example usage:
3984 
3985    Consider the following 8x8 matrix with 34 non-zero values, that is
3986    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3987    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3988    as follows:
3989 
3990 .vb
3991             1  2  0  |  0  3  0  |  0  4
3992     Proc0   0  5  6  |  7  0  0  |  8  0
3993             9  0 10  | 11  0  0  | 12  0
3994     -------------------------------------
3995            13  0 14  | 15 16 17  |  0  0
3996     Proc1   0 18  0  | 19 20 21  |  0  0
3997             0  0  0  | 22 23  0  | 24  0
3998     -------------------------------------
3999     Proc2  25 26 27  |  0  0 28  | 29  0
4000            30  0  0  | 31 32 33  |  0 34
4001 .ve
4002 
4003    This can be represented as a collection of submatrices as:
4004 
4005 .vb
4006       A B C
4007       D E F
4008       G H I
4009 .ve
4010 
4011    Where the submatrices A,B,C are owned by proc0, D,E,F are
4012    owned by proc1, G,H,I are owned by proc2.
4013 
4014    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4015    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4016    The 'M','N' parameters are 8,8, and have the same values on all procs.
4017 
4018    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4019    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4020    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4021    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4022    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4023    matrix, ans [DF] as another SeqAIJ matrix.
4024 
4025    When d_nz, o_nz parameters are specified, d_nz storage elements are
4026    allocated for every row of the local diagonal submatrix, and o_nz
4027    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4028    One way to choose d_nz and o_nz is to use the max nonzerors per local
4029    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4030    In this case, the values of d_nz,o_nz are:
4031 .vb
4032      proc0 : dnz = 2, o_nz = 2
4033      proc1 : dnz = 3, o_nz = 2
4034      proc2 : dnz = 1, o_nz = 4
4035 .ve
4036    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4037    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4038    for proc3. i.e we are using 12+15+10=37 storage locations to store
4039    34 values.
4040 
4041    When d_nnz, o_nnz parameters are specified, the storage is specified
4042    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4043    In the above case the values for d_nnz,o_nnz are:
4044 .vb
4045      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4046      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4047      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4048 .ve
4049    Here the space allocated is sum of all the above values i.e 34, and
4050    hence pre-allocation is perfect.
4051 
4052    Level: intermediate
4053 
4054 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4055           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4056 @*/
4057 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4058 {
4059   PetscFunctionBegin;
4060   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4061   PetscValidType(B,1);
4062   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4063   PetscFunctionReturn(0);
4064 }
4065 
4066 /*@
4067      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4068          CSR format for the local rows.
4069 
4070    Collective
4071 
4072    Input Parameters:
4073 +  comm - MPI communicator
4074 .  m - number of local rows (Cannot be PETSC_DECIDE)
4075 .  n - This value should be the same as the local size used in creating the
4076        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4077        calculated if N is given) For square matrices n is almost always m.
4078 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4079 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4080 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4081 .   j - column indices
4082 -   a - matrix values
4083 
4084    Output Parameter:
4085 .   mat - the matrix
4086 
4087    Level: intermediate
4088 
4089    Notes:
4090        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4091      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4092      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4093 
4094        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4095 
4096        The format which is used for the sparse matrix input, is equivalent to a
4097     row-major ordering.. i.e for the following matrix, the input data expected is
4098     as shown
4099 
4100        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4101 
4102 $        1 0 0
4103 $        2 0 3     P0
4104 $       -------
4105 $        4 5 6     P1
4106 $
4107 $     Process0 [P0]: rows_owned=[0,1]
4108 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4109 $        j =  {0,0,2}  [size = 3]
4110 $        v =  {1,2,3}  [size = 3]
4111 $
4112 $     Process1 [P1]: rows_owned=[2]
4113 $        i =  {0,3}    [size = nrow+1  = 1+1]
4114 $        j =  {0,1,2}  [size = 3]
4115 $        v =  {4,5,6}  [size = 3]
4116 
4117 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4118           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4119 @*/
4120 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4121 {
4122   PetscFunctionBegin;
4123   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4124   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4125   PetscCall(MatCreate(comm,mat));
4126   PetscCall(MatSetSizes(*mat,m,n,M,N));
4127   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4128   PetscCall(MatSetType(*mat,MATMPIAIJ));
4129   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4130   PetscFunctionReturn(0);
4131 }
4132 
4133 /*@
4134      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4135          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4136 
4137    Collective
4138 
4139    Input Parameters:
4140 +  mat - the matrix
4141 .  m - number of local rows (Cannot be PETSC_DECIDE)
4142 .  n - This value should be the same as the local size used in creating the
4143        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4144        calculated if N is given) For square matrices n is almost always m.
4145 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4146 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4147 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4148 .  J - column indices
4149 -  v - matrix values
4150 
4151    Level: intermediate
4152 
4153 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4154           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4155 @*/
4156 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4157 {
4158   PetscInt       cstart,nnz,i,j;
4159   PetscInt       *ld;
4160   PetscBool      nooffprocentries;
4161   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4162   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4163   PetscScalar    *ad,*ao;
4164   const PetscInt *Adi = Ad->i;
4165   PetscInt       ldi,Iii,md;
4166 
4167   PetscFunctionBegin;
4168   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4169   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4170   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4171   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4172 
4173   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4174   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4175   cstart = mat->cmap->rstart;
4176   if (!Aij->ld) {
4177     /* count number of entries below block diagonal */
4178     PetscCall(PetscCalloc1(m,&ld));
4179     Aij->ld = ld;
4180     for (i=0; i<m; i++) {
4181       nnz  = Ii[i+1]- Ii[i];
4182       j     = 0;
4183       while  (J[j] < cstart && j < nnz) {j++;}
4184       J    += nnz;
4185       ld[i] = j;
4186     }
4187   } else {
4188     ld = Aij->ld;
4189   }
4190 
4191   for (i=0; i<m; i++) {
4192     nnz  = Ii[i+1]- Ii[i];
4193     Iii  = Ii[i];
4194     ldi  = ld[i];
4195     md   = Adi[i+1]-Adi[i];
4196     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4197     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4198     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4199     ad  += md;
4200     ao  += nnz - md;
4201   }
4202   nooffprocentries      = mat->nooffprocentries;
4203   mat->nooffprocentries = PETSC_TRUE;
4204   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4205   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4206   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4207   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4208   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4209   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4210   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4211   mat->nooffprocentries = nooffprocentries;
4212   PetscFunctionReturn(0);
4213 }
4214 
4215 /*@C
4216    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4217    (the default parallel PETSc format).  For good matrix assembly performance
4218    the user should preallocate the matrix storage by setting the parameters
4219    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4220    performance can be increased by more than a factor of 50.
4221 
4222    Collective
4223 
4224    Input Parameters:
4225 +  comm - MPI communicator
4226 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4227            This value should be the same as the local size used in creating the
4228            y vector for the matrix-vector product y = Ax.
4229 .  n - This value should be the same as the local size used in creating the
4230        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4231        calculated if N is given) For square matrices n is almost always m.
4232 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4233 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4234 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4235            (same value is used for all local rows)
4236 .  d_nnz - array containing the number of nonzeros in the various rows of the
4237            DIAGONAL portion of the local submatrix (possibly different for each row)
4238            or NULL, if d_nz is used to specify the nonzero structure.
4239            The size of this array is equal to the number of local rows, i.e 'm'.
4240 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4241            submatrix (same value is used for all local rows).
4242 -  o_nnz - array containing the number of nonzeros in the various rows of the
4243            OFF-DIAGONAL portion of the local submatrix (possibly different for
4244            each row) or NULL, if o_nz is used to specify the nonzero
4245            structure. The size of this array is equal to the number
4246            of local rows, i.e 'm'.
4247 
4248    Output Parameter:
4249 .  A - the matrix
4250 
4251    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4252    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4253    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4254 
4255    Notes:
4256    If the *_nnz parameter is given then the *_nz parameter is ignored
4257 
4258    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4259    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4260    storage requirements for this matrix.
4261 
4262    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4263    processor than it must be used on all processors that share the object for
4264    that argument.
4265 
4266    The user MUST specify either the local or global matrix dimensions
4267    (possibly both).
4268 
4269    The parallel matrix is partitioned across processors such that the
4270    first m0 rows belong to process 0, the next m1 rows belong to
4271    process 1, the next m2 rows belong to process 2 etc.. where
4272    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4273    values corresponding to [m x N] submatrix.
4274 
4275    The columns are logically partitioned with the n0 columns belonging
4276    to 0th partition, the next n1 columns belonging to the next
4277    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4278 
4279    The DIAGONAL portion of the local submatrix on any given processor
4280    is the submatrix corresponding to the rows and columns m,n
4281    corresponding to the given processor. i.e diagonal matrix on
4282    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4283    etc. The remaining portion of the local submatrix [m x (N-n)]
4284    constitute the OFF-DIAGONAL portion. The example below better
4285    illustrates this concept.
4286 
4287    For a square global matrix we define each processor's diagonal portion
4288    to be its local rows and the corresponding columns (a square submatrix);
4289    each processor's off-diagonal portion encompasses the remainder of the
4290    local matrix (a rectangular submatrix).
4291 
4292    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4293 
4294    When calling this routine with a single process communicator, a matrix of
4295    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4296    type of communicator, use the construction mechanism
4297 .vb
4298      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4299 .ve
4300 
4301 $     MatCreate(...,&A);
4302 $     MatSetType(A,MATMPIAIJ);
4303 $     MatSetSizes(A, m,n,M,N);
4304 $     MatMPIAIJSetPreallocation(A,...);
4305 
4306    By default, this format uses inodes (identical nodes) when possible.
4307    We search for consecutive rows with the same nonzero structure, thereby
4308    reusing matrix information to achieve increased efficiency.
4309 
4310    Options Database Keys:
4311 +  -mat_no_inode  - Do not use inodes
4312 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4313 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4314         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4315         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4316 
4317    Example usage:
4318 
4319    Consider the following 8x8 matrix with 34 non-zero values, that is
4320    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4321    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4322    as follows
4323 
4324 .vb
4325             1  2  0  |  0  3  0  |  0  4
4326     Proc0   0  5  6  |  7  0  0  |  8  0
4327             9  0 10  | 11  0  0  | 12  0
4328     -------------------------------------
4329            13  0 14  | 15 16 17  |  0  0
4330     Proc1   0 18  0  | 19 20 21  |  0  0
4331             0  0  0  | 22 23  0  | 24  0
4332     -------------------------------------
4333     Proc2  25 26 27  |  0  0 28  | 29  0
4334            30  0  0  | 31 32 33  |  0 34
4335 .ve
4336 
4337    This can be represented as a collection of submatrices as
4338 
4339 .vb
4340       A B C
4341       D E F
4342       G H I
4343 .ve
4344 
4345    Where the submatrices A,B,C are owned by proc0, D,E,F are
4346    owned by proc1, G,H,I are owned by proc2.
4347 
4348    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4349    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4350    The 'M','N' parameters are 8,8, and have the same values on all procs.
4351 
4352    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4353    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4354    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4355    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4356    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4357    matrix, ans [DF] as another SeqAIJ matrix.
4358 
4359    When d_nz, o_nz parameters are specified, d_nz storage elements are
4360    allocated for every row of the local diagonal submatrix, and o_nz
4361    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4362    One way to choose d_nz and o_nz is to use the max nonzerors per local
4363    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4364    In this case, the values of d_nz,o_nz are
4365 .vb
4366      proc0 : dnz = 2, o_nz = 2
4367      proc1 : dnz = 3, o_nz = 2
4368      proc2 : dnz = 1, o_nz = 4
4369 .ve
4370    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4371    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4372    for proc3. i.e we are using 12+15+10=37 storage locations to store
4373    34 values.
4374 
4375    When d_nnz, o_nnz parameters are specified, the storage is specified
4376    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4377    In the above case the values for d_nnz,o_nnz are
4378 .vb
4379      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4380      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4381      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4382 .ve
4383    Here the space allocated is sum of all the above values i.e 34, and
4384    hence pre-allocation is perfect.
4385 
4386    Level: intermediate
4387 
4388 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4389           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4390 @*/
4391 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4392 {
4393   PetscMPIInt    size;
4394 
4395   PetscFunctionBegin;
4396   PetscCall(MatCreate(comm,A));
4397   PetscCall(MatSetSizes(*A,m,n,M,N));
4398   PetscCallMPI(MPI_Comm_size(comm,&size));
4399   if (size > 1) {
4400     PetscCall(MatSetType(*A,MATMPIAIJ));
4401     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4402   } else {
4403     PetscCall(MatSetType(*A,MATSEQAIJ));
4404     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4405   }
4406   PetscFunctionReturn(0);
4407 }
4408 
4409 /*@C
4410   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4411 
4412   Not collective
4413 
4414   Input Parameter:
4415 . A - The MPIAIJ matrix
4416 
4417   Output Parameters:
4418 + Ad - The local diagonal block as a SeqAIJ matrix
4419 . Ao - The local off-diagonal block as a SeqAIJ matrix
4420 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4421 
4422   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4423   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4424   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4425   local column numbers to global column numbers in the original matrix.
4426 
4427   Level: intermediate
4428 
4429 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4430 @*/
4431 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4432 {
4433   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4434   PetscBool      flg;
4435 
4436   PetscFunctionBegin;
4437   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4438   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4439   if (Ad)     *Ad     = a->A;
4440   if (Ao)     *Ao     = a->B;
4441   if (colmap) *colmap = a->garray;
4442   PetscFunctionReturn(0);
4443 }
4444 
4445 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4446 {
4447   PetscInt       m,N,i,rstart,nnz,Ii;
4448   PetscInt       *indx;
4449   PetscScalar    *values;
4450   MatType        rootType;
4451 
4452   PetscFunctionBegin;
4453   PetscCall(MatGetSize(inmat,&m,&N));
4454   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4455     PetscInt       *dnz,*onz,sum,bs,cbs;
4456 
4457     if (n == PETSC_DECIDE) {
4458       PetscCall(PetscSplitOwnership(comm,&n,&N));
4459     }
4460     /* Check sum(n) = N */
4461     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4462     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4463 
4464     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4465     rstart -= m;
4466 
4467     MatPreallocateBegin(comm,m,n,dnz,onz);
4468     for (i=0; i<m; i++) {
4469       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4470       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4471       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4472     }
4473 
4474     PetscCall(MatCreate(comm,outmat));
4475     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4476     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4477     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4478     PetscCall(MatGetRootType_Private(inmat,&rootType));
4479     PetscCall(MatSetType(*outmat,rootType));
4480     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4481     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4482     MatPreallocateEnd(dnz,onz);
4483     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4484   }
4485 
4486   /* numeric phase */
4487   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4488   for (i=0; i<m; i++) {
4489     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4490     Ii   = i + rstart;
4491     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4492     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4493   }
4494   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4495   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4496   PetscFunctionReturn(0);
4497 }
4498 
4499 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4500 {
4501   PetscMPIInt       rank;
4502   PetscInt          m,N,i,rstart,nnz;
4503   size_t            len;
4504   const PetscInt    *indx;
4505   PetscViewer       out;
4506   char              *name;
4507   Mat               B;
4508   const PetscScalar *values;
4509 
4510   PetscFunctionBegin;
4511   PetscCall(MatGetLocalSize(A,&m,NULL));
4512   PetscCall(MatGetSize(A,NULL,&N));
4513   /* Should this be the type of the diagonal block of A? */
4514   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4515   PetscCall(MatSetSizes(B,m,N,m,N));
4516   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4517   PetscCall(MatSetType(B,MATSEQAIJ));
4518   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4519   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4520   for (i=0; i<m; i++) {
4521     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4522     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4523     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4524   }
4525   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4526   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4527 
4528   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4529   PetscCall(PetscStrlen(outfile,&len));
4530   PetscCall(PetscMalloc1(len+6,&name));
4531   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4532   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4533   PetscCall(PetscFree(name));
4534   PetscCall(MatView(B,out));
4535   PetscCall(PetscViewerDestroy(&out));
4536   PetscCall(MatDestroy(&B));
4537   PetscFunctionReturn(0);
4538 }
4539 
4540 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4541 {
4542   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4543 
4544   PetscFunctionBegin;
4545   if (!merge) PetscFunctionReturn(0);
4546   PetscCall(PetscFree(merge->id_r));
4547   PetscCall(PetscFree(merge->len_s));
4548   PetscCall(PetscFree(merge->len_r));
4549   PetscCall(PetscFree(merge->bi));
4550   PetscCall(PetscFree(merge->bj));
4551   PetscCall(PetscFree(merge->buf_ri[0]));
4552   PetscCall(PetscFree(merge->buf_ri));
4553   PetscCall(PetscFree(merge->buf_rj[0]));
4554   PetscCall(PetscFree(merge->buf_rj));
4555   PetscCall(PetscFree(merge->coi));
4556   PetscCall(PetscFree(merge->coj));
4557   PetscCall(PetscFree(merge->owners_co));
4558   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4559   PetscCall(PetscFree(merge));
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 #include <../src/mat/utils/freespace.h>
4564 #include <petscbt.h>
4565 
4566 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4567 {
4568   MPI_Comm            comm;
4569   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4570   PetscMPIInt         size,rank,taga,*len_s;
4571   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4572   PetscInt            proc,m;
4573   PetscInt            **buf_ri,**buf_rj;
4574   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4575   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4576   MPI_Request         *s_waits,*r_waits;
4577   MPI_Status          *status;
4578   const MatScalar     *aa,*a_a;
4579   MatScalar           **abuf_r,*ba_i;
4580   Mat_Merge_SeqsToMPI *merge;
4581   PetscContainer      container;
4582 
4583   PetscFunctionBegin;
4584   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4585   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4586 
4587   PetscCallMPI(MPI_Comm_size(comm,&size));
4588   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4589 
4590   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4591   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4592   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4593   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4594   aa   = a_a;
4595 
4596   bi     = merge->bi;
4597   bj     = merge->bj;
4598   buf_ri = merge->buf_ri;
4599   buf_rj = merge->buf_rj;
4600 
4601   PetscCall(PetscMalloc1(size,&status));
4602   owners = merge->rowmap->range;
4603   len_s  = merge->len_s;
4604 
4605   /* send and recv matrix values */
4606   /*-----------------------------*/
4607   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4608   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4609 
4610   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4611   for (proc=0,k=0; proc<size; proc++) {
4612     if (!len_s[proc]) continue;
4613     i    = owners[proc];
4614     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4615     k++;
4616   }
4617 
4618   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4619   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4620   PetscCall(PetscFree(status));
4621 
4622   PetscCall(PetscFree(s_waits));
4623   PetscCall(PetscFree(r_waits));
4624 
4625   /* insert mat values of mpimat */
4626   /*----------------------------*/
4627   PetscCall(PetscMalloc1(N,&ba_i));
4628   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4629 
4630   for (k=0; k<merge->nrecv; k++) {
4631     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4632     nrows       = *(buf_ri_k[k]);
4633     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4634     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4635   }
4636 
4637   /* set values of ba */
4638   m    = merge->rowmap->n;
4639   for (i=0; i<m; i++) {
4640     arow = owners[rank] + i;
4641     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4642     bnzi = bi[i+1] - bi[i];
4643     PetscCall(PetscArrayzero(ba_i,bnzi));
4644 
4645     /* add local non-zero vals of this proc's seqmat into ba */
4646     anzi   = ai[arow+1] - ai[arow];
4647     aj     = a->j + ai[arow];
4648     aa     = a_a + ai[arow];
4649     nextaj = 0;
4650     for (j=0; nextaj<anzi; j++) {
4651       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4652         ba_i[j] += aa[nextaj++];
4653       }
4654     }
4655 
4656     /* add received vals into ba */
4657     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4658       /* i-th row */
4659       if (i == *nextrow[k]) {
4660         anzi   = *(nextai[k]+1) - *nextai[k];
4661         aj     = buf_rj[k] + *(nextai[k]);
4662         aa     = abuf_r[k] + *(nextai[k]);
4663         nextaj = 0;
4664         for (j=0; nextaj<anzi; j++) {
4665           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4666             ba_i[j] += aa[nextaj++];
4667           }
4668         }
4669         nextrow[k]++; nextai[k]++;
4670       }
4671     }
4672     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4673   }
4674   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4675   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4676   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4677 
4678   PetscCall(PetscFree(abuf_r[0]));
4679   PetscCall(PetscFree(abuf_r));
4680   PetscCall(PetscFree(ba_i));
4681   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4682   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4683   PetscFunctionReturn(0);
4684 }
4685 
4686 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4687 {
4688   Mat                 B_mpi;
4689   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4690   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4691   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4692   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4693   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4694   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4695   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4696   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4697   MPI_Status          *status;
4698   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4699   PetscBT             lnkbt;
4700   Mat_Merge_SeqsToMPI *merge;
4701   PetscContainer      container;
4702 
4703   PetscFunctionBegin;
4704   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4705 
4706   /* make sure it is a PETSc comm */
4707   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4708   PetscCallMPI(MPI_Comm_size(comm,&size));
4709   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4710 
4711   PetscCall(PetscNew(&merge));
4712   PetscCall(PetscMalloc1(size,&status));
4713 
4714   /* determine row ownership */
4715   /*---------------------------------------------------------*/
4716   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4717   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4718   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4719   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4720   PetscCall(PetscLayoutSetUp(merge->rowmap));
4721   PetscCall(PetscMalloc1(size,&len_si));
4722   PetscCall(PetscMalloc1(size,&merge->len_s));
4723 
4724   m      = merge->rowmap->n;
4725   owners = merge->rowmap->range;
4726 
4727   /* determine the number of messages to send, their lengths */
4728   /*---------------------------------------------------------*/
4729   len_s = merge->len_s;
4730 
4731   len          = 0; /* length of buf_si[] */
4732   merge->nsend = 0;
4733   for (proc=0; proc<size; proc++) {
4734     len_si[proc] = 0;
4735     if (proc == rank) {
4736       len_s[proc] = 0;
4737     } else {
4738       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4739       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4740     }
4741     if (len_s[proc]) {
4742       merge->nsend++;
4743       nrows = 0;
4744       for (i=owners[proc]; i<owners[proc+1]; i++) {
4745         if (ai[i+1] > ai[i]) nrows++;
4746       }
4747       len_si[proc] = 2*(nrows+1);
4748       len         += len_si[proc];
4749     }
4750   }
4751 
4752   /* determine the number and length of messages to receive for ij-structure */
4753   /*-------------------------------------------------------------------------*/
4754   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4755   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4756 
4757   /* post the Irecv of j-structure */
4758   /*-------------------------------*/
4759   PetscCall(PetscCommGetNewTag(comm,&tagj));
4760   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4761 
4762   /* post the Isend of j-structure */
4763   /*--------------------------------*/
4764   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4765 
4766   for (proc=0, k=0; proc<size; proc++) {
4767     if (!len_s[proc]) continue;
4768     i    = owners[proc];
4769     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4770     k++;
4771   }
4772 
4773   /* receives and sends of j-structure are complete */
4774   /*------------------------------------------------*/
4775   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4776   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4777 
4778   /* send and recv i-structure */
4779   /*---------------------------*/
4780   PetscCall(PetscCommGetNewTag(comm,&tagi));
4781   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4782 
4783   PetscCall(PetscMalloc1(len+1,&buf_s));
4784   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4785   for (proc=0,k=0; proc<size; proc++) {
4786     if (!len_s[proc]) continue;
4787     /* form outgoing message for i-structure:
4788          buf_si[0]:                 nrows to be sent
4789                [1:nrows]:           row index (global)
4790                [nrows+1:2*nrows+1]: i-structure index
4791     */
4792     /*-------------------------------------------*/
4793     nrows       = len_si[proc]/2 - 1;
4794     buf_si_i    = buf_si + nrows+1;
4795     buf_si[0]   = nrows;
4796     buf_si_i[0] = 0;
4797     nrows       = 0;
4798     for (i=owners[proc]; i<owners[proc+1]; i++) {
4799       anzi = ai[i+1] - ai[i];
4800       if (anzi) {
4801         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4802         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4803         nrows++;
4804       }
4805     }
4806     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4807     k++;
4808     buf_si += len_si[proc];
4809   }
4810 
4811   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4812   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4813 
4814   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4815   for (i=0; i<merge->nrecv; i++) {
4816     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4817   }
4818 
4819   PetscCall(PetscFree(len_si));
4820   PetscCall(PetscFree(len_ri));
4821   PetscCall(PetscFree(rj_waits));
4822   PetscCall(PetscFree2(si_waits,sj_waits));
4823   PetscCall(PetscFree(ri_waits));
4824   PetscCall(PetscFree(buf_s));
4825   PetscCall(PetscFree(status));
4826 
4827   /* compute a local seq matrix in each processor */
4828   /*----------------------------------------------*/
4829   /* allocate bi array and free space for accumulating nonzero column info */
4830   PetscCall(PetscMalloc1(m+1,&bi));
4831   bi[0] = 0;
4832 
4833   /* create and initialize a linked list */
4834   nlnk = N+1;
4835   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4836 
4837   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4838   len  = ai[owners[rank+1]] - ai[owners[rank]];
4839   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4840 
4841   current_space = free_space;
4842 
4843   /* determine symbolic info for each local row */
4844   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4845 
4846   for (k=0; k<merge->nrecv; k++) {
4847     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4848     nrows       = *buf_ri_k[k];
4849     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4850     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4851   }
4852 
4853   MatPreallocateBegin(comm,m,n,dnz,onz);
4854   len  = 0;
4855   for (i=0; i<m; i++) {
4856     bnzi = 0;
4857     /* add local non-zero cols of this proc's seqmat into lnk */
4858     arow  = owners[rank] + i;
4859     anzi  = ai[arow+1] - ai[arow];
4860     aj    = a->j + ai[arow];
4861     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4862     bnzi += nlnk;
4863     /* add received col data into lnk */
4864     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4865       if (i == *nextrow[k]) { /* i-th row */
4866         anzi  = *(nextai[k]+1) - *nextai[k];
4867         aj    = buf_rj[k] + *nextai[k];
4868         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4869         bnzi += nlnk;
4870         nextrow[k]++; nextai[k]++;
4871       }
4872     }
4873     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4874 
4875     /* if free space is not available, make more free space */
4876     if (current_space->local_remaining<bnzi) {
4877       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4878       nspacedouble++;
4879     }
4880     /* copy data into free space, then initialize lnk */
4881     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4882     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4883 
4884     current_space->array           += bnzi;
4885     current_space->local_used      += bnzi;
4886     current_space->local_remaining -= bnzi;
4887 
4888     bi[i+1] = bi[i] + bnzi;
4889   }
4890 
4891   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4892 
4893   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4894   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4895   PetscCall(PetscLLDestroy(lnk,lnkbt));
4896 
4897   /* create symbolic parallel matrix B_mpi */
4898   /*---------------------------------------*/
4899   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4900   PetscCall(MatCreate(comm,&B_mpi));
4901   if (n==PETSC_DECIDE) {
4902     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4903   } else {
4904     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4905   }
4906   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4907   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4908   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4909   MatPreallocateEnd(dnz,onz);
4910   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4911 
4912   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4913   B_mpi->assembled  = PETSC_FALSE;
4914   merge->bi         = bi;
4915   merge->bj         = bj;
4916   merge->buf_ri     = buf_ri;
4917   merge->buf_rj     = buf_rj;
4918   merge->coi        = NULL;
4919   merge->coj        = NULL;
4920   merge->owners_co  = NULL;
4921 
4922   PetscCall(PetscCommDestroy(&comm));
4923 
4924   /* attach the supporting struct to B_mpi for reuse */
4925   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4926   PetscCall(PetscContainerSetPointer(container,merge));
4927   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4928   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4929   PetscCall(PetscContainerDestroy(&container));
4930   *mpimat = B_mpi;
4931 
4932   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4933   PetscFunctionReturn(0);
4934 }
4935 
4936 /*@C
4937       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4938                  matrices from each processor
4939 
4940     Collective
4941 
4942    Input Parameters:
4943 +    comm - the communicators the parallel matrix will live on
4944 .    seqmat - the input sequential matrices
4945 .    m - number of local rows (or PETSC_DECIDE)
4946 .    n - number of local columns (or PETSC_DECIDE)
4947 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4948 
4949    Output Parameter:
4950 .    mpimat - the parallel matrix generated
4951 
4952     Level: advanced
4953 
4954    Notes:
4955      The dimensions of the sequential matrix in each processor MUST be the same.
4956      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4957      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4958 @*/
4959 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4960 {
4961   PetscMPIInt    size;
4962 
4963   PetscFunctionBegin;
4964   PetscCallMPI(MPI_Comm_size(comm,&size));
4965   if (size == 1) {
4966     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4967     if (scall == MAT_INITIAL_MATRIX) {
4968       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4969     } else {
4970       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4971     }
4972     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4973     PetscFunctionReturn(0);
4974   }
4975   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4976   if (scall == MAT_INITIAL_MATRIX) {
4977     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4978   }
4979   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4980   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4981   PetscFunctionReturn(0);
4982 }
4983 
4984 /*@
4985      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4986           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4987           with MatGetSize()
4988 
4989     Not Collective
4990 
4991    Input Parameters:
4992 +    A - the matrix
4993 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4994 
4995    Output Parameter:
4996 .    A_loc - the local sequential matrix generated
4997 
4998     Level: developer
4999 
5000    Notes:
5001      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5002 
5003      Destroy the matrix with MatDestroy()
5004 
5005 .seealso: MatMPIAIJGetLocalMat()
5006 
5007 @*/
5008 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5009 {
5010   PetscBool      mpi;
5011 
5012   PetscFunctionBegin;
5013   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5014   if (mpi) {
5015     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5016   } else {
5017     *A_loc = A;
5018     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5019   }
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 /*@
5024      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5025           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5026           with MatGetSize()
5027 
5028     Not Collective
5029 
5030    Input Parameters:
5031 +    A - the matrix
5032 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5033 
5034    Output Parameter:
5035 .    A_loc - the local sequential matrix generated
5036 
5037     Level: developer
5038 
5039    Notes:
5040      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5041 
5042      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5043      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5044      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5045      modify the values of the returned A_loc.
5046 
5047 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5048 @*/
5049 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5050 {
5051   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5052   Mat_SeqAIJ        *mat,*a,*b;
5053   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5054   const PetscScalar *aa,*ba,*aav,*bav;
5055   PetscScalar       *ca,*cam;
5056   PetscMPIInt       size;
5057   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5058   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5059   PetscBool         match;
5060 
5061   PetscFunctionBegin;
5062   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5063   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5064   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5065   if (size == 1) {
5066     if (scall == MAT_INITIAL_MATRIX) {
5067       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5068       *A_loc = mpimat->A;
5069     } else if (scall == MAT_REUSE_MATRIX) {
5070       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5071     }
5072     PetscFunctionReturn(0);
5073   }
5074 
5075   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5076   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5077   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5078   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5079   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5080   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5081   aa   = aav;
5082   ba   = bav;
5083   if (scall == MAT_INITIAL_MATRIX) {
5084     PetscCall(PetscMalloc1(1+am,&ci));
5085     ci[0] = 0;
5086     for (i=0; i<am; i++) {
5087       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5088     }
5089     PetscCall(PetscMalloc1(1+ci[am],&cj));
5090     PetscCall(PetscMalloc1(1+ci[am],&ca));
5091     k    = 0;
5092     for (i=0; i<am; i++) {
5093       ncols_o = bi[i+1] - bi[i];
5094       ncols_d = ai[i+1] - ai[i];
5095       /* off-diagonal portion of A */
5096       for (jo=0; jo<ncols_o; jo++) {
5097         col = cmap[*bj];
5098         if (col >= cstart) break;
5099         cj[k]   = col; bj++;
5100         ca[k++] = *ba++;
5101       }
5102       /* diagonal portion of A */
5103       for (j=0; j<ncols_d; j++) {
5104         cj[k]   = cstart + *aj++;
5105         ca[k++] = *aa++;
5106       }
5107       /* off-diagonal portion of A */
5108       for (j=jo; j<ncols_o; j++) {
5109         cj[k]   = cmap[*bj++];
5110         ca[k++] = *ba++;
5111       }
5112     }
5113     /* put together the new matrix */
5114     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5115     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5116     /* Since these are PETSc arrays, change flags to free them as necessary. */
5117     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5118     mat->free_a  = PETSC_TRUE;
5119     mat->free_ij = PETSC_TRUE;
5120     mat->nonew   = 0;
5121   } else if (scall == MAT_REUSE_MATRIX) {
5122     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5123     ci   = mat->i;
5124     cj   = mat->j;
5125     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5126     for (i=0; i<am; i++) {
5127       /* off-diagonal portion of A */
5128       ncols_o = bi[i+1] - bi[i];
5129       for (jo=0; jo<ncols_o; jo++) {
5130         col = cmap[*bj];
5131         if (col >= cstart) break;
5132         *cam++ = *ba++; bj++;
5133       }
5134       /* diagonal portion of A */
5135       ncols_d = ai[i+1] - ai[i];
5136       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5137       /* off-diagonal portion of A */
5138       for (j=jo; j<ncols_o; j++) {
5139         *cam++ = *ba++; bj++;
5140       }
5141     }
5142     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5143   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5144   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5145   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5146   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5147   PetscFunctionReturn(0);
5148 }
5149 
5150 /*@
5151      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5152           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5153 
5154     Not Collective
5155 
5156    Input Parameters:
5157 +    A - the matrix
5158 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5159 
5160    Output Parameters:
5161 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5162 -    A_loc - the local sequential matrix generated
5163 
5164     Level: developer
5165 
5166    Notes:
5167      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5168 
5169 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5170 
5171 @*/
5172 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5173 {
5174   Mat            Ao,Ad;
5175   const PetscInt *cmap;
5176   PetscMPIInt    size;
5177   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5178 
5179   PetscFunctionBegin;
5180   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5181   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5182   if (size == 1) {
5183     if (scall == MAT_INITIAL_MATRIX) {
5184       PetscCall(PetscObjectReference((PetscObject)Ad));
5185       *A_loc = Ad;
5186     } else if (scall == MAT_REUSE_MATRIX) {
5187       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5188     }
5189     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5190     PetscFunctionReturn(0);
5191   }
5192   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5193   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5194   if (f) {
5195     PetscCall((*f)(A,scall,glob,A_loc));
5196   } else {
5197     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5198     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5199     Mat_SeqAIJ        *c;
5200     PetscInt          *ai = a->i, *aj = a->j;
5201     PetscInt          *bi = b->i, *bj = b->j;
5202     PetscInt          *ci,*cj;
5203     const PetscScalar *aa,*ba;
5204     PetscScalar       *ca;
5205     PetscInt          i,j,am,dn,on;
5206 
5207     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5208     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5209     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5210     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5211     if (scall == MAT_INITIAL_MATRIX) {
5212       PetscInt k;
5213       PetscCall(PetscMalloc1(1+am,&ci));
5214       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5215       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5216       ci[0] = 0;
5217       for (i=0,k=0; i<am; i++) {
5218         const PetscInt ncols_o = bi[i+1] - bi[i];
5219         const PetscInt ncols_d = ai[i+1] - ai[i];
5220         ci[i+1] = ci[i] + ncols_o + ncols_d;
5221         /* diagonal portion of A */
5222         for (j=0; j<ncols_d; j++,k++) {
5223           cj[k] = *aj++;
5224           ca[k] = *aa++;
5225         }
5226         /* off-diagonal portion of A */
5227         for (j=0; j<ncols_o; j++,k++) {
5228           cj[k] = dn + *bj++;
5229           ca[k] = *ba++;
5230         }
5231       }
5232       /* put together the new matrix */
5233       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5234       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5235       /* Since these are PETSc arrays, change flags to free them as necessary. */
5236       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5237       c->free_a  = PETSC_TRUE;
5238       c->free_ij = PETSC_TRUE;
5239       c->nonew   = 0;
5240       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5241     } else if (scall == MAT_REUSE_MATRIX) {
5242       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5243       for (i=0; i<am; i++) {
5244         const PetscInt ncols_d = ai[i+1] - ai[i];
5245         const PetscInt ncols_o = bi[i+1] - bi[i];
5246         /* diagonal portion of A */
5247         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5248         /* off-diagonal portion of A */
5249         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5250       }
5251       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5252     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5253     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5254     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5255     if (glob) {
5256       PetscInt cst, *gidx;
5257 
5258       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5259       PetscCall(PetscMalloc1(dn+on,&gidx));
5260       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5261       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5262       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5263     }
5264   }
5265   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5266   PetscFunctionReturn(0);
5267 }
5268 
5269 /*@C
5270      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5271 
5272     Not Collective
5273 
5274    Input Parameters:
5275 +    A - the matrix
5276 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5277 -    row, col - index sets of rows and columns to extract (or NULL)
5278 
5279    Output Parameter:
5280 .    A_loc - the local sequential matrix generated
5281 
5282     Level: developer
5283 
5284 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5285 
5286 @*/
5287 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5288 {
5289   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5290   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5291   IS             isrowa,iscola;
5292   Mat            *aloc;
5293   PetscBool      match;
5294 
5295   PetscFunctionBegin;
5296   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5297   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5298   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5299   if (!row) {
5300     start = A->rmap->rstart; end = A->rmap->rend;
5301     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5302   } else {
5303     isrowa = *row;
5304   }
5305   if (!col) {
5306     start = A->cmap->rstart;
5307     cmap  = a->garray;
5308     nzA   = a->A->cmap->n;
5309     nzB   = a->B->cmap->n;
5310     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5311     ncols = 0;
5312     for (i=0; i<nzB; i++) {
5313       if (cmap[i] < start) idx[ncols++] = cmap[i];
5314       else break;
5315     }
5316     imark = i;
5317     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5318     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5320   } else {
5321     iscola = *col;
5322   }
5323   if (scall != MAT_INITIAL_MATRIX) {
5324     PetscCall(PetscMalloc1(1,&aloc));
5325     aloc[0] = *A_loc;
5326   }
5327   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5328   if (!col) { /* attach global id of condensed columns */
5329     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5330   }
5331   *A_loc = aloc[0];
5332   PetscCall(PetscFree(aloc));
5333   if (!row) {
5334     PetscCall(ISDestroy(&isrowa));
5335   }
5336   if (!col) {
5337     PetscCall(ISDestroy(&iscola));
5338   }
5339   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5340   PetscFunctionReturn(0);
5341 }
5342 
5343 /*
5344  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5345  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5346  * on a global size.
5347  * */
5348 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5349 {
5350   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5351   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5352   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5353   PetscMPIInt              owner;
5354   PetscSFNode              *iremote,*oiremote;
5355   const PetscInt           *lrowindices;
5356   PetscSF                  sf,osf;
5357   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5358   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5359   MPI_Comm                 comm;
5360   ISLocalToGlobalMapping   mapping;
5361   const PetscScalar        *pd_a,*po_a;
5362 
5363   PetscFunctionBegin;
5364   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5365   /* plocalsize is the number of roots
5366    * nrows is the number of leaves
5367    * */
5368   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5369   PetscCall(ISGetLocalSize(rows,&nrows));
5370   PetscCall(PetscCalloc1(nrows,&iremote));
5371   PetscCall(ISGetIndices(rows,&lrowindices));
5372   for (i=0;i<nrows;i++) {
5373     /* Find a remote index and an owner for a row
5374      * The row could be local or remote
5375      * */
5376     owner = 0;
5377     lidx  = 0;
5378     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5379     iremote[i].index = lidx;
5380     iremote[i].rank  = owner;
5381   }
5382   /* Create SF to communicate how many nonzero columns for each row */
5383   PetscCall(PetscSFCreate(comm,&sf));
5384   /* SF will figure out the number of nonzero colunms for each row, and their
5385    * offsets
5386    * */
5387   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5388   PetscCall(PetscSFSetFromOptions(sf));
5389   PetscCall(PetscSFSetUp(sf));
5390 
5391   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5392   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5393   PetscCall(PetscCalloc1(nrows,&pnnz));
5394   roffsets[0] = 0;
5395   roffsets[1] = 0;
5396   for (i=0;i<plocalsize;i++) {
5397     /* diag */
5398     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5399     /* off diag */
5400     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5401     /* compute offsets so that we relative location for each row */
5402     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5403     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5404   }
5405   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5406   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5407   /* 'r' means root, and 'l' means leaf */
5408   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5409   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5410   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5411   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5412   PetscCall(PetscSFDestroy(&sf));
5413   PetscCall(PetscFree(roffsets));
5414   PetscCall(PetscFree(nrcols));
5415   dntotalcols = 0;
5416   ontotalcols = 0;
5417   ncol = 0;
5418   for (i=0;i<nrows;i++) {
5419     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5420     ncol = PetscMax(pnnz[i],ncol);
5421     /* diag */
5422     dntotalcols += nlcols[i*2+0];
5423     /* off diag */
5424     ontotalcols += nlcols[i*2+1];
5425   }
5426   /* We do not need to figure the right number of columns
5427    * since all the calculations will be done by going through the raw data
5428    * */
5429   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5430   PetscCall(MatSetUp(*P_oth));
5431   PetscCall(PetscFree(pnnz));
5432   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5433   /* diag */
5434   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5435   /* off diag */
5436   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5437   /* diag */
5438   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5439   /* off diag */
5440   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5441   dntotalcols = 0;
5442   ontotalcols = 0;
5443   ntotalcols  = 0;
5444   for (i=0;i<nrows;i++) {
5445     owner = 0;
5446     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5447     /* Set iremote for diag matrix */
5448     for (j=0;j<nlcols[i*2+0];j++) {
5449       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5450       iremote[dntotalcols].rank    = owner;
5451       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5452       ilocal[dntotalcols++]        = ntotalcols++;
5453     }
5454     /* off diag */
5455     for (j=0;j<nlcols[i*2+1];j++) {
5456       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5457       oiremote[ontotalcols].rank    = owner;
5458       oilocal[ontotalcols++]        = ntotalcols++;
5459     }
5460   }
5461   PetscCall(ISRestoreIndices(rows,&lrowindices));
5462   PetscCall(PetscFree(loffsets));
5463   PetscCall(PetscFree(nlcols));
5464   PetscCall(PetscSFCreate(comm,&sf));
5465   /* P serves as roots and P_oth is leaves
5466    * Diag matrix
5467    * */
5468   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5469   PetscCall(PetscSFSetFromOptions(sf));
5470   PetscCall(PetscSFSetUp(sf));
5471 
5472   PetscCall(PetscSFCreate(comm,&osf));
5473   /* Off diag */
5474   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5475   PetscCall(PetscSFSetFromOptions(osf));
5476   PetscCall(PetscSFSetUp(osf));
5477   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5478   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5479   /* We operate on the matrix internal data for saving memory */
5480   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5481   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5482   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5483   /* Convert to global indices for diag matrix */
5484   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5485   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5486   /* We want P_oth store global indices */
5487   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5488   /* Use memory scalable approach */
5489   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5490   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5491   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5492   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5493   /* Convert back to local indices */
5494   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5495   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5496   nout = 0;
5497   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5498   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5499   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5500   /* Exchange values */
5501   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5502   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5503   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5504   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5505   /* Stop PETSc from shrinking memory */
5506   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5507   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5508   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5509   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5510   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5511   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5512   PetscCall(PetscSFDestroy(&sf));
5513   PetscCall(PetscSFDestroy(&osf));
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 /*
5518  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5519  * This supports MPIAIJ and MAIJ
5520  * */
5521 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5522 {
5523   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5524   Mat_SeqAIJ            *p_oth;
5525   IS                    rows,map;
5526   PetscHMapI            hamp;
5527   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5528   MPI_Comm              comm;
5529   PetscSF               sf,osf;
5530   PetscBool             has;
5531 
5532   PetscFunctionBegin;
5533   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5534   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5535   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5536    *  and then create a submatrix (that often is an overlapping matrix)
5537    * */
5538   if (reuse == MAT_INITIAL_MATRIX) {
5539     /* Use a hash table to figure out unique keys */
5540     PetscCall(PetscHMapICreate(&hamp));
5541     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5542     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5543     count = 0;
5544     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5545     for (i=0;i<a->B->cmap->n;i++) {
5546       key  = a->garray[i]/dof;
5547       PetscCall(PetscHMapIHas(hamp,key,&has));
5548       if (!has) {
5549         mapping[i] = count;
5550         PetscCall(PetscHMapISet(hamp,key,count++));
5551       } else {
5552         /* Current 'i' has the same value the previous step */
5553         mapping[i] = count-1;
5554       }
5555     }
5556     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5557     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5558     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5559     PetscCall(PetscCalloc1(htsize,&rowindices));
5560     off = 0;
5561     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5562     PetscCall(PetscHMapIDestroy(&hamp));
5563     PetscCall(PetscSortInt(htsize,rowindices));
5564     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5565     /* In case, the matrix was already created but users want to recreate the matrix */
5566     PetscCall(MatDestroy(P_oth));
5567     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5568     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5569     PetscCall(ISDestroy(&map));
5570     PetscCall(ISDestroy(&rows));
5571   } else if (reuse == MAT_REUSE_MATRIX) {
5572     /* If matrix was already created, we simply update values using SF objects
5573      * that as attached to the matrix ealier.
5574      */
5575     const PetscScalar *pd_a,*po_a;
5576 
5577     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5578     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5579     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5580     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5581     /* Update values in place */
5582     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5583     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5584     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5585     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5586     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5587     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5588     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5589     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5590   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5591   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5592   PetscFunctionReturn(0);
5593 }
5594 
5595 /*@C
5596   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5597 
5598   Collective on Mat
5599 
5600   Input Parameters:
5601 + A - the first matrix in mpiaij format
5602 . B - the second matrix in mpiaij format
5603 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5604 
5605   Output Parameters:
5606 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5607 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5608 - B_seq - the sequential matrix generated
5609 
5610   Level: developer
5611 
5612 @*/
5613 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5614 {
5615   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5616   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5617   IS             isrowb,iscolb;
5618   Mat            *bseq=NULL;
5619 
5620   PetscFunctionBegin;
5621   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5622     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5623   }
5624   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5625 
5626   if (scall == MAT_INITIAL_MATRIX) {
5627     start = A->cmap->rstart;
5628     cmap  = a->garray;
5629     nzA   = a->A->cmap->n;
5630     nzB   = a->B->cmap->n;
5631     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5632     ncols = 0;
5633     for (i=0; i<nzB; i++) {  /* row < local row index */
5634       if (cmap[i] < start) idx[ncols++] = cmap[i];
5635       else break;
5636     }
5637     imark = i;
5638     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5639     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5640     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5641     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5642   } else {
5643     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5644     isrowb  = *rowb; iscolb = *colb;
5645     PetscCall(PetscMalloc1(1,&bseq));
5646     bseq[0] = *B_seq;
5647   }
5648   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5649   *B_seq = bseq[0];
5650   PetscCall(PetscFree(bseq));
5651   if (!rowb) {
5652     PetscCall(ISDestroy(&isrowb));
5653   } else {
5654     *rowb = isrowb;
5655   }
5656   if (!colb) {
5657     PetscCall(ISDestroy(&iscolb));
5658   } else {
5659     *colb = iscolb;
5660   }
5661   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5662   PetscFunctionReturn(0);
5663 }
5664 
5665 /*
5666     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5667     of the OFF-DIAGONAL portion of local A
5668 
5669     Collective on Mat
5670 
5671    Input Parameters:
5672 +    A,B - the matrices in mpiaij format
5673 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5674 
5675    Output Parameter:
5676 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5677 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5678 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5679 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5680 
5681     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5682      for this matrix. This is not desirable..
5683 
5684     Level: developer
5685 
5686 */
5687 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5688 {
5689   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5690   Mat_SeqAIJ             *b_oth;
5691   VecScatter             ctx;
5692   MPI_Comm               comm;
5693   const PetscMPIInt      *rprocs,*sprocs;
5694   const PetscInt         *srow,*rstarts,*sstarts;
5695   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5696   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5697   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5698   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5699   PetscMPIInt            size,tag,rank,nreqs;
5700 
5701   PetscFunctionBegin;
5702   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5703   PetscCallMPI(MPI_Comm_size(comm,&size));
5704 
5705   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5706     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5707   }
5708   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5709   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5710 
5711   if (size == 1) {
5712     startsj_s = NULL;
5713     bufa_ptr  = NULL;
5714     *B_oth    = NULL;
5715     PetscFunctionReturn(0);
5716   }
5717 
5718   ctx = a->Mvctx;
5719   tag = ((PetscObject)ctx)->tag;
5720 
5721   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5722   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5723   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5724   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5725   PetscCall(PetscMalloc1(nreqs,&reqs));
5726   rwaits = reqs;
5727   swaits = reqs + nrecvs;
5728 
5729   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5730   if (scall == MAT_INITIAL_MATRIX) {
5731     /* i-array */
5732     /*---------*/
5733     /*  post receives */
5734     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5735     for (i=0; i<nrecvs; i++) {
5736       rowlen = rvalues + rstarts[i]*rbs;
5737       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5738       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5739     }
5740 
5741     /* pack the outgoing message */
5742     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5743 
5744     sstartsj[0] = 0;
5745     rstartsj[0] = 0;
5746     len         = 0; /* total length of j or a array to be sent */
5747     if (nsends) {
5748       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5749       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5750     }
5751     for (i=0; i<nsends; i++) {
5752       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5753       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5754       for (j=0; j<nrows; j++) {
5755         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5756         for (l=0; l<sbs; l++) {
5757           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5758 
5759           rowlen[j*sbs+l] = ncols;
5760 
5761           len += ncols;
5762           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5763         }
5764         k++;
5765       }
5766       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5767 
5768       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5769     }
5770     /* recvs and sends of i-array are completed */
5771     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5772     PetscCall(PetscFree(svalues));
5773 
5774     /* allocate buffers for sending j and a arrays */
5775     PetscCall(PetscMalloc1(len+1,&bufj));
5776     PetscCall(PetscMalloc1(len+1,&bufa));
5777 
5778     /* create i-array of B_oth */
5779     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5780 
5781     b_othi[0] = 0;
5782     len       = 0; /* total length of j or a array to be received */
5783     k         = 0;
5784     for (i=0; i<nrecvs; i++) {
5785       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5786       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5787       for (j=0; j<nrows; j++) {
5788         b_othi[k+1] = b_othi[k] + rowlen[j];
5789         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5790         k++;
5791       }
5792       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5793     }
5794     PetscCall(PetscFree(rvalues));
5795 
5796     /* allocate space for j and a arrays of B_oth */
5797     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5798     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5799 
5800     /* j-array */
5801     /*---------*/
5802     /*  post receives of j-array */
5803     for (i=0; i<nrecvs; i++) {
5804       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5805       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5806     }
5807 
5808     /* pack the outgoing message j-array */
5809     if (nsends) k = sstarts[0];
5810     for (i=0; i<nsends; i++) {
5811       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5812       bufJ  = bufj+sstartsj[i];
5813       for (j=0; j<nrows; j++) {
5814         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5815         for (ll=0; ll<sbs; ll++) {
5816           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5817           for (l=0; l<ncols; l++) {
5818             *bufJ++ = cols[l];
5819           }
5820           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5821         }
5822       }
5823       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5824     }
5825 
5826     /* recvs and sends of j-array are completed */
5827     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5828   } else if (scall == MAT_REUSE_MATRIX) {
5829     sstartsj = *startsj_s;
5830     rstartsj = *startsj_r;
5831     bufa     = *bufa_ptr;
5832     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5833     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5834   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5835 
5836   /* a-array */
5837   /*---------*/
5838   /*  post receives of a-array */
5839   for (i=0; i<nrecvs; i++) {
5840     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5841     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5842   }
5843 
5844   /* pack the outgoing message a-array */
5845   if (nsends) k = sstarts[0];
5846   for (i=0; i<nsends; i++) {
5847     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5848     bufA  = bufa+sstartsj[i];
5849     for (j=0; j<nrows; j++) {
5850       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5851       for (ll=0; ll<sbs; ll++) {
5852         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5853         for (l=0; l<ncols; l++) {
5854           *bufA++ = vals[l];
5855         }
5856         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5857       }
5858     }
5859     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5860   }
5861   /* recvs and sends of a-array are completed */
5862   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5863   PetscCall(PetscFree(reqs));
5864 
5865   if (scall == MAT_INITIAL_MATRIX) {
5866     /* put together the new matrix */
5867     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5868 
5869     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5870     /* Since these are PETSc arrays, change flags to free them as necessary. */
5871     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5872     b_oth->free_a  = PETSC_TRUE;
5873     b_oth->free_ij = PETSC_TRUE;
5874     b_oth->nonew   = 0;
5875 
5876     PetscCall(PetscFree(bufj));
5877     if (!startsj_s || !bufa_ptr) {
5878       PetscCall(PetscFree2(sstartsj,rstartsj));
5879       PetscCall(PetscFree(bufa_ptr));
5880     } else {
5881       *startsj_s = sstartsj;
5882       *startsj_r = rstartsj;
5883       *bufa_ptr  = bufa;
5884     }
5885   } else if (scall == MAT_REUSE_MATRIX) {
5886     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5887   }
5888 
5889   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5890   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5891   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5892   PetscFunctionReturn(0);
5893 }
5894 
5895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5898 #if defined(PETSC_HAVE_MKL_SPARSE)
5899 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5900 #endif
5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5903 #if defined(PETSC_HAVE_ELEMENTAL)
5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5905 #endif
5906 #if defined(PETSC_HAVE_SCALAPACK)
5907 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5908 #endif
5909 #if defined(PETSC_HAVE_HYPRE)
5910 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5911 #endif
5912 #if defined(PETSC_HAVE_CUDA)
5913 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5914 #endif
5915 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5917 #endif
5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5919 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5920 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5921 
5922 /*
5923     Computes (B'*A')' since computing B*A directly is untenable
5924 
5925                n                       p                          p
5926         [             ]       [             ]         [                 ]
5927       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5928         [             ]       [             ]         [                 ]
5929 
5930 */
5931 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5932 {
5933   Mat            At,Bt,Ct;
5934 
5935   PetscFunctionBegin;
5936   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5937   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5938   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5939   PetscCall(MatDestroy(&At));
5940   PetscCall(MatDestroy(&Bt));
5941   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5942   PetscCall(MatDestroy(&Ct));
5943   PetscFunctionReturn(0);
5944 }
5945 
5946 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5947 {
5948   PetscBool      cisdense;
5949 
5950   PetscFunctionBegin;
5951   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5952   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5953   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5954   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5955   if (!cisdense) {
5956     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5957   }
5958   PetscCall(MatSetUp(C));
5959 
5960   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5961   PetscFunctionReturn(0);
5962 }
5963 
5964 /* ----------------------------------------------------------------*/
5965 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5966 {
5967   Mat_Product *product = C->product;
5968   Mat         A = product->A,B=product->B;
5969 
5970   PetscFunctionBegin;
5971   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5972     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5973 
5974   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5975   C->ops->productsymbolic = MatProductSymbolic_AB;
5976   PetscFunctionReturn(0);
5977 }
5978 
5979 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5980 {
5981   Mat_Product    *product = C->product;
5982 
5983   PetscFunctionBegin;
5984   if (product->type == MATPRODUCT_AB) {
5985     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5986   }
5987   PetscFunctionReturn(0);
5988 }
5989 
5990 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5991 
5992   Input Parameters:
5993 
5994     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5995     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5996 
5997     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5998 
5999     For Set1, j1[] contains column indices of the nonzeros.
6000     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6001     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6002     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6003 
6004     Similar for Set2.
6005 
6006     This routine merges the two sets of nonzeros row by row and removes repeats.
6007 
6008   Output Parameters: (memory is allocated by the caller)
6009 
6010     i[],j[]: the CSR of the merged matrix, which has m rows.
6011     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6012     imap2[]: similar to imap1[], but for Set2.
6013     Note we order nonzeros row-by-row and from left to right.
6014 */
6015 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6016   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6017   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6018 {
6019   PetscInt       r,m; /* Row index of mat */
6020   PetscCount     t,t1,t2,b1,e1,b2,e2;
6021 
6022   PetscFunctionBegin;
6023   PetscCall(MatGetLocalSize(mat,&m,NULL));
6024   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6025   i[0] = 0;
6026   for (r=0; r<m; r++) { /* Do row by row merging */
6027     b1   = rowBegin1[r];
6028     e1   = rowEnd1[r];
6029     b2   = rowBegin2[r];
6030     e2   = rowEnd2[r];
6031     while (b1 < e1 && b2 < e2) {
6032       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6033         j[t]      = j1[b1];
6034         imap1[t1] = t;
6035         imap2[t2] = t;
6036         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6037         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6038         t1++; t2++; t++;
6039       } else if (j1[b1] < j2[b2]) {
6040         j[t]      = j1[b1];
6041         imap1[t1] = t;
6042         b1       += jmap1[t1+1] - jmap1[t1];
6043         t1++; t++;
6044       } else {
6045         j[t]      = j2[b2];
6046         imap2[t2] = t;
6047         b2       += jmap2[t2+1] - jmap2[t2];
6048         t2++; t++;
6049       }
6050     }
6051     /* Merge the remaining in either j1[] or j2[] */
6052     while (b1 < e1) {
6053       j[t]      = j1[b1];
6054       imap1[t1] = t;
6055       b1       += jmap1[t1+1] - jmap1[t1];
6056       t1++; t++;
6057     }
6058     while (b2 < e2) {
6059       j[t]      = j2[b2];
6060       imap2[t2] = t;
6061       b2       += jmap2[t2+1] - jmap2[t2];
6062       t2++; t++;
6063     }
6064     i[r+1] = t;
6065   }
6066   PetscFunctionReturn(0);
6067 }
6068 
6069 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6070 
6071   Input Parameters:
6072     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6073     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6074       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6075 
6076       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6077       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6078 
6079   Output Parameters:
6080     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6081     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6082       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6083       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6084 
6085     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6086       Atot: number of entries belonging to the diagonal block.
6087       Annz: number of unique nonzeros belonging to the diagonal block.
6088       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6089         repeats (i.e., same 'i,j' pair).
6090       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6091         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6092 
6093       Atot: number of entries belonging to the diagonal block
6094       Annz: number of unique nonzeros belonging to the diagonal block.
6095 
6096     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6097 
6098     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6099 */
6100 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6101   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6102   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6103   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6104 {
6105   PetscInt          cstart,cend,rstart,rend,row,col;
6106   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6107   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6108   PetscCount        k,m,p,q,r,s,mid;
6109   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6110 
6111   PetscFunctionBegin;
6112   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6113   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6114   m    = rend - rstart;
6115 
6116   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6117 
6118   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6119      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6120   */
6121   while (k<n) {
6122     row = i[k];
6123     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6124     for (s=k; s<n; s++) if (i[s] != row) break;
6125     for (p=k; p<s; p++) {
6126       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6127       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6128     }
6129     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6130     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6131     rowBegin[row-rstart] = k;
6132     rowMid[row-rstart]   = mid;
6133     rowEnd[row-rstart]   = s;
6134 
6135     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6136     Atot += mid - k;
6137     Btot += s - mid;
6138 
6139     /* Count unique nonzeros of this diag/offdiag row */
6140     for (p=k; p<mid;) {
6141       col = j[p];
6142       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6143       Annz++;
6144     }
6145 
6146     for (p=mid; p<s;) {
6147       col = j[p];
6148       do {p++;} while (p<s && j[p] == col);
6149       Bnnz++;
6150     }
6151     k = s;
6152   }
6153 
6154   /* Allocation according to Atot, Btot, Annz, Bnnz */
6155   PetscCall(PetscMalloc1(Atot,&Aperm));
6156   PetscCall(PetscMalloc1(Btot,&Bperm));
6157   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6158   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6159 
6160   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6161   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6162   for (r=0; r<m; r++) {
6163     k     = rowBegin[r];
6164     mid   = rowMid[r];
6165     s     = rowEnd[r];
6166     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6167     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6168     Atot += mid - k;
6169     Btot += s - mid;
6170 
6171     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6172     for (p=k; p<mid;) {
6173       col = j[p];
6174       q   = p;
6175       do {p++;} while (p<mid && j[p] == col);
6176       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6177       Annz++;
6178     }
6179 
6180     for (p=mid; p<s;) {
6181       col = j[p];
6182       q   = p;
6183       do {p++;} while (p<s && j[p] == col);
6184       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6185       Bnnz++;
6186     }
6187   }
6188   /* Output */
6189   *Aperm_ = Aperm;
6190   *Annz_  = Annz;
6191   *Atot_  = Atot;
6192   *Ajmap_ = Ajmap;
6193   *Bperm_ = Bperm;
6194   *Bnnz_  = Bnnz;
6195   *Btot_  = Btot;
6196   *Bjmap_ = Bjmap;
6197   PetscFunctionReturn(0);
6198 }
6199 
6200 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6201 
6202   Input Parameters:
6203     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6204     nnz:  number of unique nonzeros in the merged matrix
6205     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6206     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6207 
6208   Output Parameter: (memory is allocated by the caller)
6209     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6210 
6211   Example:
6212     nnz1 = 4
6213     nnz  = 6
6214     imap = [1,3,4,5]
6215     jmap = [0,3,5,6,7]
6216    then,
6217     jmap_new = [0,0,3,3,5,6,7]
6218 */
6219 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6220 {
6221   PetscCount k,p;
6222 
6223   PetscFunctionBegin;
6224   jmap_new[0] = 0;
6225   p = nnz; /* p loops over jmap_new[] backwards */
6226   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6227     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6228   }
6229   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6230   PetscFunctionReturn(0);
6231 }
6232 
6233 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6234 {
6235   MPI_Comm                  comm;
6236   PetscMPIInt               rank,size;
6237   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6238   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6239   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6240 
6241   PetscFunctionBegin;
6242   PetscCall(PetscFree(mpiaij->garray));
6243   PetscCall(VecDestroy(&mpiaij->lvec));
6244 #if defined(PETSC_USE_CTABLE)
6245   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6246 #else
6247   PetscCall(PetscFree(mpiaij->colmap));
6248 #endif
6249   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6250   mat->assembled = PETSC_FALSE;
6251   mat->was_assembled = PETSC_FALSE;
6252   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6253 
6254   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6255   PetscCallMPI(MPI_Comm_size(comm,&size));
6256   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6257   PetscCall(PetscLayoutSetUp(mat->rmap));
6258   PetscCall(PetscLayoutSetUp(mat->cmap));
6259   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6260   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6261   PetscCall(MatGetLocalSize(mat,&m,&n));
6262   PetscCall(MatGetSize(mat,&M,&N));
6263 
6264   /* ---------------------------------------------------------------------------*/
6265   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6266   /* entries come first, then local rows, then remote rows.                     */
6267   /* ---------------------------------------------------------------------------*/
6268   PetscCount n1 = coo_n,*perm1;
6269   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6270   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6271   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6272   PetscCall(PetscArraycpy(j1,coo_j,n1));
6273   for (k=0; k<n1; k++) perm1[k] = k;
6274 
6275   /* Manipulate indices so that entries with negative row or col indices will have smallest
6276      row indices, local entries will have greater but negative row indices, and remote entries
6277      will have positive row indices.
6278   */
6279   for (k=0; k<n1; k++) {
6280     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6281     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6282     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6283     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6284   }
6285 
6286   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6287   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6288   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6289   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6290   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6291 
6292   /* ---------------------------------------------------------------------------*/
6293   /*           Split local rows into diag/offdiag portions                      */
6294   /* ---------------------------------------------------------------------------*/
6295   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6296   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6297   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6298 
6299   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6300   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6301   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6302 
6303   /* ---------------------------------------------------------------------------*/
6304   /*           Send remote rows to their owner                                  */
6305   /* ---------------------------------------------------------------------------*/
6306   /* Find which rows should be sent to which remote ranks*/
6307   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6308   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6309   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6310   const PetscInt *ranges;
6311   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6312 
6313   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6314   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6315   for (k=rem; k<n1;) {
6316     PetscMPIInt  owner;
6317     PetscInt     firstRow,lastRow;
6318 
6319     /* Locate a row range */
6320     firstRow = i1[k]; /* first row of this owner */
6321     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6322     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6323 
6324     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6325     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6326 
6327     /* All entries in [k,p) belong to this remote owner */
6328     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6329       PetscMPIInt *sendto2;
6330       PetscInt    *nentries2;
6331       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6332 
6333       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6334       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6335       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6336       PetscCall(PetscFree2(sendto,nentries2));
6337       sendto      = sendto2;
6338       nentries    = nentries2;
6339       maxNsend    = maxNsend2;
6340     }
6341     sendto[nsend]   = owner;
6342     nentries[nsend] = p - k;
6343     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6344     nsend++;
6345     k = p;
6346   }
6347 
6348   /* Build 1st SF to know offsets on remote to send data */
6349   PetscSF     sf1;
6350   PetscInt    nroots = 1,nroots2 = 0;
6351   PetscInt    nleaves = nsend,nleaves2 = 0;
6352   PetscInt    *offsets;
6353   PetscSFNode *iremote;
6354 
6355   PetscCall(PetscSFCreate(comm,&sf1));
6356   PetscCall(PetscMalloc1(nsend,&iremote));
6357   PetscCall(PetscMalloc1(nsend,&offsets));
6358   for (k=0; k<nsend; k++) {
6359     iremote[k].rank  = sendto[k];
6360     iremote[k].index = 0;
6361     nleaves2        += nentries[k];
6362     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6363   }
6364   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6365   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6366   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6367   PetscCall(PetscSFDestroy(&sf1));
6368   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6369 
6370   /* Build 2nd SF to send remote COOs to their owner */
6371   PetscSF sf2;
6372   nroots  = nroots2;
6373   nleaves = nleaves2;
6374   PetscCall(PetscSFCreate(comm,&sf2));
6375   PetscCall(PetscSFSetFromOptions(sf2));
6376   PetscCall(PetscMalloc1(nleaves,&iremote));
6377   p       = 0;
6378   for (k=0; k<nsend; k++) {
6379     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6380     for (q=0; q<nentries[k]; q++,p++) {
6381       iremote[p].rank  = sendto[k];
6382       iremote[p].index = offsets[k] + q;
6383     }
6384   }
6385   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6386 
6387   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6388   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6389 
6390   /* Send the remote COOs to their owner */
6391   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6392   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6393   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6394   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6395   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6396   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6397   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6398 
6399   PetscCall(PetscFree(offsets));
6400   PetscCall(PetscFree2(sendto,nentries));
6401 
6402   /* ---------------------------------------------------------------*/
6403   /* Sort received COOs by row along with the permutation array     */
6404   /* ---------------------------------------------------------------*/
6405   for (k=0; k<n2; k++) perm2[k] = k;
6406   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6407 
6408   /* ---------------------------------------------------------------*/
6409   /* Split received COOs into diag/offdiag portions                 */
6410   /* ---------------------------------------------------------------*/
6411   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6412   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6413   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6414 
6415   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6416   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6417 
6418   /* --------------------------------------------------------------------------*/
6419   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6420   /* --------------------------------------------------------------------------*/
6421   PetscInt   *Ai,*Bi;
6422   PetscInt   *Aj,*Bj;
6423 
6424   PetscCall(PetscMalloc1(m+1,&Ai));
6425   PetscCall(PetscMalloc1(m+1,&Bi));
6426   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6427   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6428 
6429   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6430   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6431   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6432   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6433   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6434 
6435   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6436   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6437 
6438   /* --------------------------------------------------------------------------*/
6439   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6440   /* expect nonzeros in A/B most likely have local contributing entries        */
6441   /* --------------------------------------------------------------------------*/
6442   PetscInt Annz = Ai[m];
6443   PetscInt Bnnz = Bi[m];
6444   PetscCount *Ajmap1_new,*Bjmap1_new;
6445 
6446   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6447   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6448 
6449   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6450   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6451 
6452   PetscCall(PetscFree(Aimap1));
6453   PetscCall(PetscFree(Ajmap1));
6454   PetscCall(PetscFree(Bimap1));
6455   PetscCall(PetscFree(Bjmap1));
6456   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6457   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6458   PetscCall(PetscFree3(i1,j1,perm1));
6459   PetscCall(PetscFree3(i2,j2,perm2));
6460 
6461   Ajmap1 = Ajmap1_new;
6462   Bjmap1 = Bjmap1_new;
6463 
6464   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6465   if (Annz < Annz1 + Annz2) {
6466     PetscInt *Aj_new;
6467     PetscCall(PetscMalloc1(Annz,&Aj_new));
6468     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6469     PetscCall(PetscFree(Aj));
6470     Aj   = Aj_new;
6471   }
6472 
6473   if (Bnnz < Bnnz1 + Bnnz2) {
6474     PetscInt *Bj_new;
6475     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6476     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6477     PetscCall(PetscFree(Bj));
6478     Bj   = Bj_new;
6479   }
6480 
6481   /* --------------------------------------------------------------------------------*/
6482   /* Create new submatrices for on-process and off-process coupling                  */
6483   /* --------------------------------------------------------------------------------*/
6484   PetscScalar   *Aa,*Ba;
6485   MatType       rtype;
6486   Mat_SeqAIJ    *a,*b;
6487   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6488   PetscCall(PetscCalloc1(Bnnz,&Ba));
6489   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6490   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6491   PetscCall(MatDestroy(&mpiaij->A));
6492   PetscCall(MatDestroy(&mpiaij->B));
6493   PetscCall(MatGetRootType_Private(mat,&rtype));
6494   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6495   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6496   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6497 
6498   a = (Mat_SeqAIJ*)mpiaij->A->data;
6499   b = (Mat_SeqAIJ*)mpiaij->B->data;
6500   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6501   a->free_a       = b->free_a       = PETSC_TRUE;
6502   a->free_ij      = b->free_ij      = PETSC_TRUE;
6503 
6504   /* conversion must happen AFTER multiply setup */
6505   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6506   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6507   PetscCall(VecDestroy(&mpiaij->lvec));
6508   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6509   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6510 
6511   mpiaij->coo_n   = coo_n;
6512   mpiaij->coo_sf  = sf2;
6513   mpiaij->sendlen = nleaves;
6514   mpiaij->recvlen = nroots;
6515 
6516   mpiaij->Annz    = Annz;
6517   mpiaij->Bnnz    = Bnnz;
6518 
6519   mpiaij->Annz2   = Annz2;
6520   mpiaij->Bnnz2   = Bnnz2;
6521 
6522   mpiaij->Atot1   = Atot1;
6523   mpiaij->Atot2   = Atot2;
6524   mpiaij->Btot1   = Btot1;
6525   mpiaij->Btot2   = Btot2;
6526 
6527   mpiaij->Ajmap1  = Ajmap1;
6528   mpiaij->Aperm1  = Aperm1;
6529 
6530   mpiaij->Bjmap1  = Bjmap1;
6531   mpiaij->Bperm1  = Bperm1;
6532 
6533   mpiaij->Aimap2  = Aimap2;
6534   mpiaij->Ajmap2  = Ajmap2;
6535   mpiaij->Aperm2  = Aperm2;
6536 
6537   mpiaij->Bimap2  = Bimap2;
6538   mpiaij->Bjmap2  = Bjmap2;
6539   mpiaij->Bperm2  = Bperm2;
6540 
6541   mpiaij->Cperm1  = Cperm1;
6542 
6543   /* Allocate in preallocation. If not used, it has zero cost on host */
6544   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6545   PetscFunctionReturn(0);
6546 }
6547 
6548 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6549 {
6550   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6551   Mat                  A = mpiaij->A,B = mpiaij->B;
6552   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6553   PetscScalar          *Aa,*Ba;
6554   PetscScalar          *sendbuf = mpiaij->sendbuf;
6555   PetscScalar          *recvbuf = mpiaij->recvbuf;
6556   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6557   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6558   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6559   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6560 
6561   PetscFunctionBegin;
6562   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6563   PetscCall(MatSeqAIJGetArray(B,&Ba));
6564 
6565   /* Pack entries to be sent to remote */
6566   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6567 
6568   /* Send remote entries to their owner and overlap the communication with local computation */
6569   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6570   /* Add local entries to A and B */
6571   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6572     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6573     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6574     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6575   }
6576   for (PetscCount i=0; i<Bnnz; i++) {
6577     PetscScalar sum = 0.0;
6578     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6579     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6580   }
6581   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6582 
6583   /* Add received remote entries to A and B */
6584   for (PetscCount i=0; i<Annz2; i++) {
6585     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6586   }
6587   for (PetscCount i=0; i<Bnnz2; i++) {
6588     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6589   }
6590   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6591   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6592   PetscFunctionReturn(0);
6593 }
6594 
6595 /* ----------------------------------------------------------------*/
6596 
6597 /*MC
6598    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6599 
6600    Options Database Keys:
6601 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6602 
6603    Level: beginner
6604 
6605    Notes:
6606     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6607     in this case the values associated with the rows and columns one passes in are set to zero
6608     in the matrix
6609 
6610     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6611     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6612 
6613 .seealso: `MatCreateAIJ()`
6614 M*/
6615 
6616 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6617 {
6618   Mat_MPIAIJ     *b;
6619   PetscMPIInt    size;
6620 
6621   PetscFunctionBegin;
6622   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6623 
6624   PetscCall(PetscNewLog(B,&b));
6625   B->data       = (void*)b;
6626   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6627   B->assembled  = PETSC_FALSE;
6628   B->insertmode = NOT_SET_VALUES;
6629   b->size       = size;
6630 
6631   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6632 
6633   /* build cache for off array entries formed */
6634   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6635 
6636   b->donotstash  = PETSC_FALSE;
6637   b->colmap      = NULL;
6638   b->garray      = NULL;
6639   b->roworiented = PETSC_TRUE;
6640 
6641   /* stuff used for matrix vector multiply */
6642   b->lvec  = NULL;
6643   b->Mvctx = NULL;
6644 
6645   /* stuff for MatGetRow() */
6646   b->rowindices   = NULL;
6647   b->rowvalues    = NULL;
6648   b->getrowactive = PETSC_FALSE;
6649 
6650   /* flexible pointer used in CUSPARSE classes */
6651   b->spptr = NULL;
6652 
6653   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6654   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6655   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6656   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6657   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6658   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6659   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6663 #if defined(PETSC_HAVE_CUDA)
6664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6665 #endif
6666 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6668 #endif
6669 #if defined(PETSC_HAVE_MKL_SPARSE)
6670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6671 #endif
6672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6676 #if defined(PETSC_HAVE_ELEMENTAL)
6677   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6678 #endif
6679 #if defined(PETSC_HAVE_SCALAPACK)
6680   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6681 #endif
6682   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6683   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6684 #if defined(PETSC_HAVE_HYPRE)
6685   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6686   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6687 #endif
6688   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6689   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6690   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6691   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6692   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6693   PetscFunctionReturn(0);
6694 }
6695 
6696 /*@C
6697      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6698          and "off-diagonal" part of the matrix in CSR format.
6699 
6700    Collective
6701 
6702    Input Parameters:
6703 +  comm - MPI communicator
6704 .  m - number of local rows (Cannot be PETSC_DECIDE)
6705 .  n - This value should be the same as the local size used in creating the
6706        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6707        calculated if N is given) For square matrices n is almost always m.
6708 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6709 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6710 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6711 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6712 .   a - matrix values
6713 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6714 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6715 -   oa - matrix values
6716 
6717    Output Parameter:
6718 .   mat - the matrix
6719 
6720    Level: advanced
6721 
6722    Notes:
6723        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6724        must free the arrays once the matrix has been destroyed and not before.
6725 
6726        The i and j indices are 0 based
6727 
6728        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6729 
6730        This sets local rows and cannot be used to set off-processor values.
6731 
6732        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6733        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6734        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6735        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6736        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6737        communication if it is known that only local entries will be set.
6738 
6739 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6740           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6741 @*/
6742 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6743 {
6744   Mat_MPIAIJ     *maij;
6745 
6746   PetscFunctionBegin;
6747   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6748   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6749   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6750   PetscCall(MatCreate(comm,mat));
6751   PetscCall(MatSetSizes(*mat,m,n,M,N));
6752   PetscCall(MatSetType(*mat,MATMPIAIJ));
6753   maij = (Mat_MPIAIJ*) (*mat)->data;
6754 
6755   (*mat)->preallocated = PETSC_TRUE;
6756 
6757   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6758   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6759 
6760   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6761   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6762 
6763   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6764   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6765   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6766   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6767   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6768   PetscFunctionReturn(0);
6769 }
6770 
6771 typedef struct {
6772   Mat       *mp;    /* intermediate products */
6773   PetscBool *mptmp; /* is the intermediate product temporary ? */
6774   PetscInt  cp;     /* number of intermediate products */
6775 
6776   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6777   PetscInt    *startsj_s,*startsj_r;
6778   PetscScalar *bufa;
6779   Mat         P_oth;
6780 
6781   /* may take advantage of merging product->B */
6782   Mat Bloc; /* B-local by merging diag and off-diag */
6783 
6784   /* cusparse does not have support to split between symbolic and numeric phases.
6785      When api_user is true, we don't need to update the numerical values
6786      of the temporary storage */
6787   PetscBool reusesym;
6788 
6789   /* support for COO values insertion */
6790   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6791   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6792   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6793   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6794   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6795   PetscMemType mtype;
6796 
6797   /* customization */
6798   PetscBool abmerge;
6799   PetscBool P_oth_bind;
6800 } MatMatMPIAIJBACKEND;
6801 
6802 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6803 {
6804   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6805   PetscInt            i;
6806 
6807   PetscFunctionBegin;
6808   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6809   PetscCall(PetscFree(mmdata->bufa));
6810   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6811   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6812   PetscCall(MatDestroy(&mmdata->P_oth));
6813   PetscCall(MatDestroy(&mmdata->Bloc));
6814   PetscCall(PetscSFDestroy(&mmdata->sf));
6815   for (i = 0; i < mmdata->cp; i++) {
6816     PetscCall(MatDestroy(&mmdata->mp[i]));
6817   }
6818   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6819   PetscCall(PetscFree(mmdata->own[0]));
6820   PetscCall(PetscFree(mmdata->own));
6821   PetscCall(PetscFree(mmdata->off[0]));
6822   PetscCall(PetscFree(mmdata->off));
6823   PetscCall(PetscFree(mmdata));
6824   PetscFunctionReturn(0);
6825 }
6826 
6827 /* Copy selected n entries with indices in idx[] of A to v[].
6828    If idx is NULL, copy the whole data array of A to v[]
6829  */
6830 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6831 {
6832   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6833 
6834   PetscFunctionBegin;
6835   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6836   if (f) {
6837     PetscCall((*f)(A,n,idx,v));
6838   } else {
6839     const PetscScalar *vv;
6840 
6841     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6842     if (n && idx) {
6843       PetscScalar    *w = v;
6844       const PetscInt *oi = idx;
6845       PetscInt       j;
6846 
6847       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6848     } else {
6849       PetscCall(PetscArraycpy(v,vv,n));
6850     }
6851     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6852   }
6853   PetscFunctionReturn(0);
6854 }
6855 
6856 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6857 {
6858   MatMatMPIAIJBACKEND *mmdata;
6859   PetscInt            i,n_d,n_o;
6860 
6861   PetscFunctionBegin;
6862   MatCheckProduct(C,1);
6863   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6864   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6865   if (!mmdata->reusesym) { /* update temporary matrices */
6866     if (mmdata->P_oth) {
6867       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6868     }
6869     if (mmdata->Bloc) {
6870       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6871     }
6872   }
6873   mmdata->reusesym = PETSC_FALSE;
6874 
6875   for (i = 0; i < mmdata->cp; i++) {
6876     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6877     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6878   }
6879   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6880     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6881 
6882     if (mmdata->mptmp[i]) continue;
6883     if (noff) {
6884       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6885 
6886       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6887       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6888       n_o += noff;
6889       n_d += nown;
6890     } else {
6891       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6892 
6893       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6894       n_d += mm->nz;
6895     }
6896   }
6897   if (mmdata->hasoffproc) { /* offprocess insertion */
6898     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6899     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6900   }
6901   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6902   PetscFunctionReturn(0);
6903 }
6904 
6905 /* Support for Pt * A, A * P, or Pt * A * P */
6906 #define MAX_NUMBER_INTERMEDIATE 4
6907 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6908 {
6909   Mat_Product            *product = C->product;
6910   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6911   Mat_MPIAIJ             *a,*p;
6912   MatMatMPIAIJBACKEND    *mmdata;
6913   ISLocalToGlobalMapping P_oth_l2g = NULL;
6914   IS                     glob = NULL;
6915   const char             *prefix;
6916   char                   pprefix[256];
6917   const PetscInt         *globidx,*P_oth_idx;
6918   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6919   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6920   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6921                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6922                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6923   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6924 
6925   MatProductType         ptype;
6926   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6927   PetscMPIInt            size;
6928 
6929   PetscFunctionBegin;
6930   MatCheckProduct(C,1);
6931   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6932   ptype = product->type;
6933   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6934     ptype = MATPRODUCT_AB;
6935     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6936   }
6937   switch (ptype) {
6938   case MATPRODUCT_AB:
6939     A = product->A;
6940     P = product->B;
6941     m = A->rmap->n;
6942     n = P->cmap->n;
6943     M = A->rmap->N;
6944     N = P->cmap->N;
6945     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6946     break;
6947   case MATPRODUCT_AtB:
6948     P = product->A;
6949     A = product->B;
6950     m = P->cmap->n;
6951     n = A->cmap->n;
6952     M = P->cmap->N;
6953     N = A->cmap->N;
6954     hasoffproc = PETSC_TRUE;
6955     break;
6956   case MATPRODUCT_PtAP:
6957     A = product->A;
6958     P = product->B;
6959     m = P->cmap->n;
6960     n = P->cmap->n;
6961     M = P->cmap->N;
6962     N = P->cmap->N;
6963     hasoffproc = PETSC_TRUE;
6964     break;
6965   default:
6966     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6967   }
6968   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6969   if (size == 1) hasoffproc = PETSC_FALSE;
6970 
6971   /* defaults */
6972   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6973     mp[i]    = NULL;
6974     mptmp[i] = PETSC_FALSE;
6975     rmapt[i] = -1;
6976     cmapt[i] = -1;
6977     rmapa[i] = NULL;
6978     cmapa[i] = NULL;
6979   }
6980 
6981   /* customization */
6982   PetscCall(PetscNew(&mmdata));
6983   mmdata->reusesym = product->api_user;
6984   if (ptype == MATPRODUCT_AB) {
6985     if (product->api_user) {
6986       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
6987       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6988       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6989       PetscOptionsEnd();
6990     } else {
6991       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
6992       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6993       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6994       PetscOptionsEnd();
6995     }
6996   } else if (ptype == MATPRODUCT_PtAP) {
6997     if (product->api_user) {
6998       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
6999       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7000       PetscOptionsEnd();
7001     } else {
7002       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7003       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7004       PetscOptionsEnd();
7005     }
7006   }
7007   a = (Mat_MPIAIJ*)A->data;
7008   p = (Mat_MPIAIJ*)P->data;
7009   PetscCall(MatSetSizes(C,m,n,M,N));
7010   PetscCall(PetscLayoutSetUp(C->rmap));
7011   PetscCall(PetscLayoutSetUp(C->cmap));
7012   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7013   PetscCall(MatGetOptionsPrefix(C,&prefix));
7014 
7015   cp   = 0;
7016   switch (ptype) {
7017   case MATPRODUCT_AB: /* A * P */
7018     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7019 
7020     /* A_diag * P_local (merged or not) */
7021     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7022       /* P is product->B */
7023       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7024       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7025       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7026       PetscCall(MatProductSetFill(mp[cp],product->fill));
7027       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7028       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7029       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7030       mp[cp]->product->api_user = product->api_user;
7031       PetscCall(MatProductSetFromOptions(mp[cp]));
7032       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7033       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7034       PetscCall(ISGetIndices(glob,&globidx));
7035       rmapt[cp] = 1;
7036       cmapt[cp] = 2;
7037       cmapa[cp] = globidx;
7038       mptmp[cp] = PETSC_FALSE;
7039       cp++;
7040     } else { /* A_diag * P_diag and A_diag * P_off */
7041       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7042       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7043       PetscCall(MatProductSetFill(mp[cp],product->fill));
7044       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7045       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7046       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7047       mp[cp]->product->api_user = product->api_user;
7048       PetscCall(MatProductSetFromOptions(mp[cp]));
7049       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7050       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7051       rmapt[cp] = 1;
7052       cmapt[cp] = 1;
7053       mptmp[cp] = PETSC_FALSE;
7054       cp++;
7055       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7056       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7057       PetscCall(MatProductSetFill(mp[cp],product->fill));
7058       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7059       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7060       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7061       mp[cp]->product->api_user = product->api_user;
7062       PetscCall(MatProductSetFromOptions(mp[cp]));
7063       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7064       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7065       rmapt[cp] = 1;
7066       cmapt[cp] = 2;
7067       cmapa[cp] = p->garray;
7068       mptmp[cp] = PETSC_FALSE;
7069       cp++;
7070     }
7071 
7072     /* A_off * P_other */
7073     if (mmdata->P_oth) {
7074       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7075       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7076       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7077       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7078       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7079       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7080       PetscCall(MatProductSetFill(mp[cp],product->fill));
7081       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7082       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7083       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7084       mp[cp]->product->api_user = product->api_user;
7085       PetscCall(MatProductSetFromOptions(mp[cp]));
7086       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7087       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7088       rmapt[cp] = 1;
7089       cmapt[cp] = 2;
7090       cmapa[cp] = P_oth_idx;
7091       mptmp[cp] = PETSC_FALSE;
7092       cp++;
7093     }
7094     break;
7095 
7096   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7097     /* A is product->B */
7098     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7099     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7100       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7101       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7102       PetscCall(MatProductSetFill(mp[cp],product->fill));
7103       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7104       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7105       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7106       mp[cp]->product->api_user = product->api_user;
7107       PetscCall(MatProductSetFromOptions(mp[cp]));
7108       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7109       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7110       PetscCall(ISGetIndices(glob,&globidx));
7111       rmapt[cp] = 2;
7112       rmapa[cp] = globidx;
7113       cmapt[cp] = 2;
7114       cmapa[cp] = globidx;
7115       mptmp[cp] = PETSC_FALSE;
7116       cp++;
7117     } else {
7118       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7119       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7120       PetscCall(MatProductSetFill(mp[cp],product->fill));
7121       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7122       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7123       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7124       mp[cp]->product->api_user = product->api_user;
7125       PetscCall(MatProductSetFromOptions(mp[cp]));
7126       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7127       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7128       PetscCall(ISGetIndices(glob,&globidx));
7129       rmapt[cp] = 1;
7130       cmapt[cp] = 2;
7131       cmapa[cp] = globidx;
7132       mptmp[cp] = PETSC_FALSE;
7133       cp++;
7134       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7135       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7136       PetscCall(MatProductSetFill(mp[cp],product->fill));
7137       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7138       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7139       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7140       mp[cp]->product->api_user = product->api_user;
7141       PetscCall(MatProductSetFromOptions(mp[cp]));
7142       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7143       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7144       rmapt[cp] = 2;
7145       rmapa[cp] = p->garray;
7146       cmapt[cp] = 2;
7147       cmapa[cp] = globidx;
7148       mptmp[cp] = PETSC_FALSE;
7149       cp++;
7150     }
7151     break;
7152   case MATPRODUCT_PtAP:
7153     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7154     /* P is product->B */
7155     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7156     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7157     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7158     PetscCall(MatProductSetFill(mp[cp],product->fill));
7159     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7160     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7161     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7162     mp[cp]->product->api_user = product->api_user;
7163     PetscCall(MatProductSetFromOptions(mp[cp]));
7164     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7165     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7166     PetscCall(ISGetIndices(glob,&globidx));
7167     rmapt[cp] = 2;
7168     rmapa[cp] = globidx;
7169     cmapt[cp] = 2;
7170     cmapa[cp] = globidx;
7171     mptmp[cp] = PETSC_FALSE;
7172     cp++;
7173     if (mmdata->P_oth) {
7174       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7175       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7176       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7177       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7178       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7179       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7180       PetscCall(MatProductSetFill(mp[cp],product->fill));
7181       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7182       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7183       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7184       mp[cp]->product->api_user = product->api_user;
7185       PetscCall(MatProductSetFromOptions(mp[cp]));
7186       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7187       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7188       mptmp[cp] = PETSC_TRUE;
7189       cp++;
7190       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7191       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7192       PetscCall(MatProductSetFill(mp[cp],product->fill));
7193       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7194       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7195       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7196       mp[cp]->product->api_user = product->api_user;
7197       PetscCall(MatProductSetFromOptions(mp[cp]));
7198       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7199       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7200       rmapt[cp] = 2;
7201       rmapa[cp] = globidx;
7202       cmapt[cp] = 2;
7203       cmapa[cp] = P_oth_idx;
7204       mptmp[cp] = PETSC_FALSE;
7205       cp++;
7206     }
7207     break;
7208   default:
7209     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7210   }
7211   /* sanity check */
7212   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7213 
7214   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7215   for (i = 0; i < cp; i++) {
7216     mmdata->mp[i]    = mp[i];
7217     mmdata->mptmp[i] = mptmp[i];
7218   }
7219   mmdata->cp = cp;
7220   C->product->data       = mmdata;
7221   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7222   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7223 
7224   /* memory type */
7225   mmdata->mtype = PETSC_MEMTYPE_HOST;
7226   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7227   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7228   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7229   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7230 
7231   /* prepare coo coordinates for values insertion */
7232 
7233   /* count total nonzeros of those intermediate seqaij Mats
7234     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7235     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7236     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7237   */
7238   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7239     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7240     if (mptmp[cp]) continue;
7241     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7242       const PetscInt *rmap = rmapa[cp];
7243       const PetscInt mr = mp[cp]->rmap->n;
7244       const PetscInt rs = C->rmap->rstart;
7245       const PetscInt re = C->rmap->rend;
7246       const PetscInt *ii  = mm->i;
7247       for (i = 0; i < mr; i++) {
7248         const PetscInt gr = rmap[i];
7249         const PetscInt nz = ii[i+1] - ii[i];
7250         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7251         else ncoo_oown += nz; /* this row is local */
7252       }
7253     } else ncoo_d += mm->nz;
7254   }
7255 
7256   /*
7257     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7258 
7259     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7260 
7261     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7262 
7263     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7264     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7265     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7266 
7267     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7268     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7269   */
7270   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7271   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7272 
7273   /* gather (i,j) of nonzeros inserted by remote procs */
7274   if (hasoffproc) {
7275     PetscSF  msf;
7276     PetscInt ncoo2,*coo_i2,*coo_j2;
7277 
7278     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7279     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7280     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7281 
7282     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7283       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7284       PetscInt   *idxoff = mmdata->off[cp];
7285       PetscInt   *idxown = mmdata->own[cp];
7286       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7287         const PetscInt *rmap = rmapa[cp];
7288         const PetscInt *cmap = cmapa[cp];
7289         const PetscInt *ii  = mm->i;
7290         PetscInt       *coi = coo_i + ncoo_o;
7291         PetscInt       *coj = coo_j + ncoo_o;
7292         const PetscInt mr = mp[cp]->rmap->n;
7293         const PetscInt rs = C->rmap->rstart;
7294         const PetscInt re = C->rmap->rend;
7295         const PetscInt cs = C->cmap->rstart;
7296         for (i = 0; i < mr; i++) {
7297           const PetscInt *jj = mm->j + ii[i];
7298           const PetscInt gr  = rmap[i];
7299           const PetscInt nz  = ii[i+1] - ii[i];
7300           if (gr < rs || gr >= re) { /* this is an offproc row */
7301             for (j = ii[i]; j < ii[i+1]; j++) {
7302               *coi++ = gr;
7303               *idxoff++ = j;
7304             }
7305             if (!cmapt[cp]) { /* already global */
7306               for (j = 0; j < nz; j++) *coj++ = jj[j];
7307             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7308               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7309             } else { /* offdiag */
7310               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7311             }
7312             ncoo_o += nz;
7313           } else { /* this is a local row */
7314             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7315           }
7316         }
7317       }
7318       mmdata->off[cp + 1] = idxoff;
7319       mmdata->own[cp + 1] = idxown;
7320     }
7321 
7322     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7323     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7324     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7325     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7326     ncoo = ncoo_d + ncoo_oown + ncoo2;
7327     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7328     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7329     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7330     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7331     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7332     PetscCall(PetscFree2(coo_i,coo_j));
7333     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7334     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7335     coo_i = coo_i2;
7336     coo_j = coo_j2;
7337   } else { /* no offproc values insertion */
7338     ncoo = ncoo_d;
7339     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7340 
7341     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7342     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7343     PetscCall(PetscSFSetUp(mmdata->sf));
7344   }
7345   mmdata->hasoffproc = hasoffproc;
7346 
7347   /* gather (i,j) of nonzeros inserted locally */
7348   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7349     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7350     PetscInt       *coi = coo_i + ncoo_d;
7351     PetscInt       *coj = coo_j + ncoo_d;
7352     const PetscInt *jj  = mm->j;
7353     const PetscInt *ii  = mm->i;
7354     const PetscInt *cmap = cmapa[cp];
7355     const PetscInt *rmap = rmapa[cp];
7356     const PetscInt mr = mp[cp]->rmap->n;
7357     const PetscInt rs = C->rmap->rstart;
7358     const PetscInt re = C->rmap->rend;
7359     const PetscInt cs = C->cmap->rstart;
7360 
7361     if (mptmp[cp]) continue;
7362     if (rmapt[cp] == 1) { /* consecutive rows */
7363       /* fill coo_i */
7364       for (i = 0; i < mr; i++) {
7365         const PetscInt gr = i + rs;
7366         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7367       }
7368       /* fill coo_j */
7369       if (!cmapt[cp]) { /* type-0, already global */
7370         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7371       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7372         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7373       } else { /* type-2, local to global for sparse columns */
7374         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7375       }
7376       ncoo_d += mm->nz;
7377     } else if (rmapt[cp] == 2) { /* sparse rows */
7378       for (i = 0; i < mr; i++) {
7379         const PetscInt *jj = mm->j + ii[i];
7380         const PetscInt gr  = rmap[i];
7381         const PetscInt nz  = ii[i+1] - ii[i];
7382         if (gr >= rs && gr < re) { /* local rows */
7383           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7384           if (!cmapt[cp]) { /* type-0, already global */
7385             for (j = 0; j < nz; j++) *coj++ = jj[j];
7386           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7387             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7388           } else { /* type-2, local to global for sparse columns */
7389             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7390           }
7391           ncoo_d += nz;
7392         }
7393       }
7394     }
7395   }
7396   if (glob) {
7397     PetscCall(ISRestoreIndices(glob,&globidx));
7398   }
7399   PetscCall(ISDestroy(&glob));
7400   if (P_oth_l2g) {
7401     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7402   }
7403   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7404   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7405   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7406 
7407   /* preallocate with COO data */
7408   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7409   PetscCall(PetscFree2(coo_i,coo_j));
7410   PetscFunctionReturn(0);
7411 }
7412 
7413 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7414 {
7415   Mat_Product *product = mat->product;
7416 #if defined(PETSC_HAVE_DEVICE)
7417   PetscBool    match   = PETSC_FALSE;
7418   PetscBool    usecpu  = PETSC_FALSE;
7419 #else
7420   PetscBool    match   = PETSC_TRUE;
7421 #endif
7422 
7423   PetscFunctionBegin;
7424   MatCheckProduct(mat,1);
7425 #if defined(PETSC_HAVE_DEVICE)
7426   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7427     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7428   }
7429   if (match) { /* we can always fallback to the CPU if requested */
7430     switch (product->type) {
7431     case MATPRODUCT_AB:
7432       if (product->api_user) {
7433         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7434         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7435         PetscOptionsEnd();
7436       } else {
7437         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7438         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7439         PetscOptionsEnd();
7440       }
7441       break;
7442     case MATPRODUCT_AtB:
7443       if (product->api_user) {
7444         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7445         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7446         PetscOptionsEnd();
7447       } else {
7448         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7449         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7450         PetscOptionsEnd();
7451       }
7452       break;
7453     case MATPRODUCT_PtAP:
7454       if (product->api_user) {
7455         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7456         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7457         PetscOptionsEnd();
7458       } else {
7459         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7460         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7461         PetscOptionsEnd();
7462       }
7463       break;
7464     default:
7465       break;
7466     }
7467     match = (PetscBool)!usecpu;
7468   }
7469 #endif
7470   if (match) {
7471     switch (product->type) {
7472     case MATPRODUCT_AB:
7473     case MATPRODUCT_AtB:
7474     case MATPRODUCT_PtAP:
7475       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7476       break;
7477     default:
7478       break;
7479     }
7480   }
7481   /* fallback to MPIAIJ ops */
7482   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7483   PetscFunctionReturn(0);
7484 }
7485 
7486 /*
7487    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7488 
7489    n - the number of block indices in cc[]
7490    cc - the block indices (must be large enough to contain the indices)
7491 */
7492 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
7493 {
7494   PetscInt       cnt = -1,nidx,j;
7495   const PetscInt *idx;
7496 
7497   PetscFunctionBegin;
7498   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
7499   if (nidx) {
7500     cnt = 0;
7501     cc[cnt] = idx[0]/bs;
7502     for (j=1; j<nidx; j++) {
7503       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
7504     }
7505   }
7506   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
7507   *n = cnt+1;
7508   PetscFunctionReturn(0);
7509 }
7510 
7511 /*
7512     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7513 
7514     ncollapsed - the number of block indices
7515     collapsed - the block indices (must be large enough to contain the indices)
7516 */
7517 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
7518 {
7519   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
7520 
7521   PetscFunctionBegin;
7522   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
7523   for (i=start+1; i<start+bs; i++) {
7524     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
7525     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
7526     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
7527   }
7528   *ncollapsed = nprev;
7529   if (collapsed) *collapsed  = cprev;
7530   PetscFunctionReturn(0);
7531 }
7532 
7533 /* -------------------------------------------------------------------------- */
7534 /*
7535  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7536 
7537  Input Parameter:
7538  . Amat - matrix
7539  - symmetrize - make the result symmetric
7540  + scale - scale with diagonal
7541 
7542  Output Parameter:
7543  . a_Gmat - output scalar graph >= 0
7544 
7545  */
7546 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat)
7547 {
7548   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
7549   MPI_Comm       comm;
7550   Mat            Gmat;
7551   PetscBool      ismpiaij,isseqaij;
7552   Mat            a, b, c;
7553   MatType        jtype;
7554 
7555   PetscFunctionBegin;
7556   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
7557   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7558   PetscCall(MatGetSize(Amat, &MM, &NN));
7559   PetscCall(MatGetBlockSize(Amat, &bs));
7560   nloc = (Iend-Istart)/bs;
7561 
7562   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
7563   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
7564   PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
7565 
7566   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7567   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7568      implementation */
7569   if (bs > 1) {
7570     PetscCall(MatGetType(Amat,&jtype));
7571     PetscCall(MatCreate(comm, &Gmat));
7572     PetscCall(MatSetType(Gmat, jtype));
7573     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
7574     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7575     if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) {
7576       PetscInt  *d_nnz, *o_nnz;
7577       MatScalar *aa,val,AA[4096];
7578       PetscInt  *aj,*ai,AJ[4096],nc;
7579       if (isseqaij) { a = Amat; b = NULL; }
7580       else {
7581         Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
7582         a = d->A; b = d->B;
7583       }
7584       PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc));
7585       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7586       for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7587         PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
7588         const PetscInt *cols;
7589         for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
7590           PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
7591           nnz[brow/bs] = jj/bs;
7592           if (jj%bs) ok = 0;
7593           if (cols) j0 = cols[0];
7594           else j0 = -1;
7595           PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
7596           if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
7597           for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
7598             PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
7599             if (jj%bs) ok = 0;
7600             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7601             if (nnz[brow/bs] != jj/bs) ok = 0;
7602             PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
7603           }
7604           if (!ok) {
7605             PetscCall(PetscFree2(d_nnz,o_nnz));
7606             goto old_bs;
7607           }
7608         }
7609         PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
7610       }
7611       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7612       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7613       PetscCall(PetscFree2(d_nnz,o_nnz));
7614       // diag
7615       for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
7616         Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
7617         ai = aseq->i;
7618         n  = ai[brow+1] - ai[brow];
7619         aj = aseq->j + ai[brow];
7620         for (int k=0; k<n; k += bs) { // block columns
7621           AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
7622           val = 0;
7623           for (int ii=0; ii<bs; ii++) { // rows in block
7624             aa = aseq->a + ai[brow+ii] + k;
7625             for (int jj=0; jj<bs; jj++) { // columns in block
7626               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7627             }
7628           }
7629           AA[k/bs] = val;
7630         }
7631         grow = Istart/bs + brow/bs;
7632         PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
7633       }
7634       // off-diag
7635       if (ismpiaij) {
7636         Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
7637         const PetscScalar *vals;
7638         const PetscInt    *cols, *garray = aij->garray;
7639         PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
7640         for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
7641           PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
7642           for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) {
7643             AA[k/bs] = 0;
7644             AJ[cidx] = garray[cols[k]]/bs;
7645           }
7646           nc = ncols/bs;
7647           PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
7648           for (int ii=0; ii<bs; ii++) { // rows in block
7649             PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
7650             for (int k=0; k<ncols; k += bs) {
7651               for (int jj=0; jj<bs; jj++) { // cols in block
7652                 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
7653               }
7654             }
7655             PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
7656           }
7657           grow = Istart/bs + brow/bs;
7658           PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
7659         }
7660       }
7661       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7662       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7663     } else {
7664       const PetscScalar *vals;
7665       const PetscInt    *idx;
7666       PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
7667       old_bs:
7668       /*
7669        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7670        */
7671       PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n"));
7672       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7673       if (isseqaij) {
7674         PetscInt max_d_nnz;
7675         /*
7676          Determine exact preallocation count for (sequential) scalar matrix
7677          */
7678         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
7679         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7680         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7681         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
7682           PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7683         }
7684         PetscCall(PetscFree3(w0,w1,w2));
7685       } else if (ismpiaij) {
7686         Mat            Daij,Oaij;
7687         const PetscInt *garray;
7688         PetscInt       max_d_nnz;
7689         PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
7690         /*
7691          Determine exact preallocation count for diagonal block portion of scalar matrix
7692          */
7693         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
7694         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7695         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7696         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7697           PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7698         }
7699         PetscCall(PetscFree3(w0,w1,w2));
7700         /*
7701          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7702          */
7703         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7704           o_nnz[jj] = 0;
7705           for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
7706             PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7707             o_nnz[jj] += ncols;
7708             PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7709           }
7710           if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
7711         }
7712       } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type");
7713       /* get scalar copy (norms) of matrix */
7714       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7715       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7716       PetscCall(PetscFree2(d_nnz,o_nnz));
7717       for (Ii = Istart; Ii < Iend; Ii++) {
7718         PetscInt dest_row = Ii/bs;
7719         PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
7720         for (jj=0; jj<ncols; jj++) {
7721           PetscInt    dest_col = idx[jj]/bs;
7722           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7723           PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
7724         }
7725         PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
7726       }
7727       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7728       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7729     }
7730   } else {
7731     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7732      procedure via MatAbs API */
7733     /* just copy scalar matrix & abs() */
7734     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7735     if (isseqaij) { a = Gmat; b = NULL; }
7736     else {
7737       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7738       a = d->A; b = d->B;
7739     }
7740     /* abs */
7741     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7742       MatInfo     info;
7743       PetscScalar *avals;
7744       PetscCall(MatGetInfo(c,MAT_LOCAL,&info));
7745       PetscCall(MatSeqAIJGetArray(c,&avals));
7746       for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7747       PetscCall(MatSeqAIJRestoreArray(c,&avals));
7748     }
7749   }
7750   if (symmetrize) {
7751     PetscBool issym;
7752     PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym));
7753     if (!issym) {
7754       Mat matTrans;
7755       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7756       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7757       PetscCall(MatDestroy(&matTrans));
7758     }
7759     PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE));
7760   } else {
7761     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7762   }
7763   if (scale) {
7764     /* scale c for all diagonal values = 1 or -1 */
7765     Vec               diag;
7766     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7767     PetscCall(MatGetDiagonal(Gmat, diag));
7768     PetscCall(VecReciprocal(diag));
7769     PetscCall(VecSqrtAbs(diag));
7770     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7771     PetscCall(VecDestroy(&diag));
7772   }
7773   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7774   *a_Gmat = Gmat;
7775   PetscFunctionReturn(0);
7776 }
7777 
7778 /* -------------------------------------------------------------------------- */
7779 /*@C
7780    MatFilter_AIJ - filter values with small absolute values
7781      With vfilter < 0 does nothing so should not be called.
7782 
7783    Collective on Mat
7784 
7785    Input Parameters:
7786 +   Gmat - the graph
7787 .   vfilter - threshold parameter [0,1)
7788 
7789  Output Parameter:
7790  .  filteredG - output filtered scalar graph
7791 
7792    Level: developer
7793 
7794    Notes:
7795     This is called before graph coarsers are called.
7796     This could go into Mat, move 'symm' to GAMG
7797 
7798 .seealso: `PCGAMGSetThreshold()`
7799 @*/
7800 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG)
7801 {
7802   PetscInt          Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc;
7803   Mat               tGmat;
7804   MPI_Comm          comm;
7805   const PetscScalar *vals;
7806   const PetscInt    *idx;
7807   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0;
7808   MatScalar         *AA; // this is checked in graph
7809   PetscBool         isseqaij;
7810   Mat               a, b, c;
7811   MatType           jtype;
7812 
7813   PetscFunctionBegin;
7814   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
7815   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij));
7816   PetscCall(MatGetType(Gmat,&jtype));
7817   PetscCall(MatCreate(comm, &tGmat));
7818   PetscCall(MatSetType(tGmat, jtype));
7819 
7820   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7821                Also, if the matrix is symmetric, can we skip this
7822                operation? It can be very expensive on large matrices. */
7823 
7824   // global sizes
7825   PetscCall(MatGetSize(Gmat, &MM, &NN));
7826   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7827   nloc = Iend - Istart;
7828   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
7829   if (isseqaij) { a = Gmat; b = NULL; }
7830   else {
7831     Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7832     a = d->A; b = d->B;
7833     garray = d->garray;
7834   }
7835   /* Determine upper bound on non-zeros needed in new filtered matrix */
7836   for (PetscInt row=0; row < nloc; row++) {
7837     PetscCall(MatGetRow(a,row,&ncols,NULL,NULL));
7838     d_nnz[row] = ncols;
7839     if (ncols>maxcols) maxcols=ncols;
7840     PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL));
7841   }
7842   if (b) {
7843     for (PetscInt row=0; row < nloc; row++) {
7844       PetscCall(MatGetRow(b,row,&ncols,NULL,NULL));
7845       o_nnz[row] = ncols;
7846       if (ncols>maxcols) maxcols=ncols;
7847       PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL));
7848     }
7849   }
7850   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
7851   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7852   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
7853   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
7854   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
7855   PetscCall(PetscFree2(d_nnz,o_nnz));
7856   //
7857   PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ));
7858   nnz0 = nnz1 = 0;
7859   for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7860     for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) {
7861       PetscCall(MatGetRow(c,row,&ncols,&idx,&vals));
7862       for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) {
7863         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7864         if (PetscRealPart(sv) > vfilter) {
7865           nnz1++;
7866           PetscInt cid = idx[jj] + Istart; //diag
7867           if (c!=a) cid = garray[idx[jj]];
7868           AA[ncol_row] = vals[jj];
7869           AJ[ncol_row] = cid;
7870           ncol_row++;
7871         }
7872       }
7873       PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals));
7874       PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES));
7875     }
7876   }
7877   PetscCall(PetscFree2(AA,AJ));
7878   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
7879   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
7880   PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */
7881 
7882   PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n",
7883                       (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter,
7884                       (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols));
7885 
7886   *filteredG = tGmat;
7887   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7888   PetscFunctionReturn(0);
7889 }
7890 
7891 /*
7892     Special version for direct calls from Fortran
7893 */
7894 #include <petsc/private/fortranimpl.h>
7895 
7896 /* Change these macros so can be used in void function */
7897 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7898 #undef  PetscCall
7899 #define PetscCall(...) do {                                                                    \
7900     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7901     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7902       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7903       return;                                                                                  \
7904     }                                                                                          \
7905   } while (0)
7906 
7907 #undef SETERRQ
7908 #define SETERRQ(comm,ierr,...) do {                                                            \
7909     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7910     return;                                                                                    \
7911   } while (0)
7912 
7913 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7914 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7915 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7916 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7917 #else
7918 #endif
7919 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7920 {
7921   Mat          mat  = *mmat;
7922   PetscInt     m    = *mm, n = *mn;
7923   InsertMode   addv = *maddv;
7924   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7925   PetscScalar  value;
7926 
7927   MatCheckPreallocated(mat,1);
7928   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7929   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7930   {
7931     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7932     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7933     PetscBool roworiented = aij->roworiented;
7934 
7935     /* Some Variables required in the macro */
7936     Mat        A                    = aij->A;
7937     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7938     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7939     MatScalar  *aa;
7940     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7941     Mat        B                    = aij->B;
7942     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7943     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7944     MatScalar  *ba;
7945     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7946      * cannot use "#if defined" inside a macro. */
7947     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7948 
7949     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7950     PetscInt  nonew = a->nonew;
7951     MatScalar *ap1,*ap2;
7952 
7953     PetscFunctionBegin;
7954     PetscCall(MatSeqAIJGetArray(A,&aa));
7955     PetscCall(MatSeqAIJGetArray(B,&ba));
7956     for (i=0; i<m; i++) {
7957       if (im[i] < 0) continue;
7958       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7959       if (im[i] >= rstart && im[i] < rend) {
7960         row      = im[i] - rstart;
7961         lastcol1 = -1;
7962         rp1      = aj + ai[row];
7963         ap1      = aa + ai[row];
7964         rmax1    = aimax[row];
7965         nrow1    = ailen[row];
7966         low1     = 0;
7967         high1    = nrow1;
7968         lastcol2 = -1;
7969         rp2      = bj + bi[row];
7970         ap2      = ba + bi[row];
7971         rmax2    = bimax[row];
7972         nrow2    = bilen[row];
7973         low2     = 0;
7974         high2    = nrow2;
7975 
7976         for (j=0; j<n; j++) {
7977           if (roworiented) value = v[i*n+j];
7978           else value = v[i+j*m];
7979           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7980           if (in[j] >= cstart && in[j] < cend) {
7981             col = in[j] - cstart;
7982             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7983           } else if (in[j] < 0) continue;
7984           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7985             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7986             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
7987           } else {
7988             if (mat->was_assembled) {
7989               if (!aij->colmap) {
7990                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7991               }
7992 #if defined(PETSC_USE_CTABLE)
7993               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
7994               col--;
7995 #else
7996               col = aij->colmap[in[j]] - 1;
7997 #endif
7998               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7999                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8000                 col  =  in[j];
8001                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8002                 B        = aij->B;
8003                 b        = (Mat_SeqAIJ*)B->data;
8004                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
8005                 rp2      = bj + bi[row];
8006                 ap2      = ba + bi[row];
8007                 rmax2    = bimax[row];
8008                 nrow2    = bilen[row];
8009                 low2     = 0;
8010                 high2    = nrow2;
8011                 bm       = aij->B->rmap->n;
8012                 ba       = b->a;
8013                 inserted = PETSC_FALSE;
8014               }
8015             } else col = in[j];
8016             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
8017           }
8018         }
8019       } else if (!aij->donotstash) {
8020         if (roworiented) {
8021           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8022         } else {
8023           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8024         }
8025       }
8026     }
8027     PetscCall(MatSeqAIJRestoreArray(A,&aa));
8028     PetscCall(MatSeqAIJRestoreArray(B,&ba));
8029   }
8030   PetscFunctionReturnVoid();
8031 }
8032 
8033 /* Undefining these here since they were redefined from their original definition above! No
8034  * other PETSc functions should be defined past this point, as it is impossible to recover the
8035  * original definitions */
8036 #undef PetscCall
8037 #undef SETERRQ
8038