xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscCall(MatDestroy(&B));
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
22 {
23   Mat            B;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
27   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
28   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",NULL));
29   PetscFunctionReturn(0);
30 }
31 
32 /*MC
33    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
36    and MATMPIAIJ otherwise.  As a result, for single process communicators,
37   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
43 
44   Developer Notes:
45     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
46    enough exist.
47 
48   Level: beginner
49 
50 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
51 M*/
52 
53 /*MC
54    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
55 
56    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
57    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
58    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
59   for communicators controlling multiple processes.  It is recommended that you call both of
60   the above preallocation routines for simplicity.
61 
62    Options Database Keys:
63 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
64 
65   Level: beginner
66 
67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
68 M*/
69 
70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
71 {
72   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
73 
74   PetscFunctionBegin;
75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
76   A->boundtocpu = flg;
77 #endif
78   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
79   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
80 
81   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
82    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
83    * to differ from the parent matrix. */
84   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
85   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
86 
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
91 {
92   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
93 
94   PetscFunctionBegin;
95   if (mat->A) {
96     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
97     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
103 {
104   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
105   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
106   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
107   const PetscInt  *ia,*ib;
108   const MatScalar *aa,*bb,*aav,*bav;
109   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
110   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
111 
112   PetscFunctionBegin;
113   *keptrows = NULL;
114 
115   ia   = a->i;
116   ib   = b->i;
117   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
118   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
119   for (i=0; i<m; i++) {
120     na = ia[i+1] - ia[i];
121     nb = ib[i+1] - ib[i];
122     if (!na && !nb) {
123       cnt++;
124       goto ok1;
125     }
126     aa = aav + ia[i];
127     for (j=0; j<na; j++) {
128       if (aa[j] != 0.0) goto ok1;
129     }
130     bb = bav + ib[i];
131     for (j=0; j <nb; j++) {
132       if (bb[j] != 0.0) goto ok1;
133     }
134     cnt++;
135 ok1:;
136   }
137   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
138   if (!n0rows) {
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
140     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
141     PetscFunctionReturn(0);
142   }
143   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
144   cnt  = 0;
145   for (i=0; i<m; i++) {
146     na = ia[i+1] - ia[i];
147     nb = ib[i+1] - ib[i];
148     if (!na && !nb) continue;
149     aa = aav + ia[i];
150     for (j=0; j<na;j++) {
151       if (aa[j] != 0.0) {
152         rows[cnt++] = rstart + i;
153         goto ok2;
154       }
155     }
156     bb = bav + ib[i];
157     for (j=0; j<nb; j++) {
158       if (bb[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163 ok2:;
164   }
165   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
167   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
172 {
173   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
174   PetscBool         cong;
175 
176   PetscFunctionBegin;
177   PetscCall(MatHasCongruentLayouts(Y,&cong));
178   if (Y->assembled && cong) {
179     PetscCall(MatDiagonalSet(aij->A,D,is));
180   } else {
181     PetscCall(MatDiagonalSet_Default(Y,D,is));
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
189   PetscInt       i,rstart,nrows,*rows;
190 
191   PetscFunctionBegin;
192   *zrows = NULL;
193   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
194   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
195   for (i=0; i<nrows; i++) rows[i] += rstart;
196   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
201 {
202   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
203   PetscInt          i,m,n,*garray = aij->garray;
204   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
205   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
206   PetscReal         *work;
207   const PetscScalar *dummy;
208 
209   PetscFunctionBegin;
210   PetscCall(MatGetSize(A,&m,&n));
211   PetscCall(PetscCalloc1(n,&work));
212   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
214   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
215   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
216   if (type == NORM_2) {
217     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
218       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
219     }
220     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
221       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
222     }
223   } else if (type == NORM_1) {
224     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
225       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
226     }
227     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
228       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
229     }
230   } else if (type == NORM_INFINITY) {
231     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
232       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
233     }
234     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
235       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
236     }
237   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
238     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
239       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
240     }
241     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
242       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
243     }
244   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
245     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
246       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
247     }
248     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
249       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
250     }
251   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
252   if (type == NORM_INFINITY) {
253     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
254   } else {
255     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
256   }
257   PetscCall(PetscFree(work));
258   if (type == NORM_2) {
259     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
260   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
261     for (i=0; i<n; i++) reductions[i] /= m;
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
267 {
268   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
269   IS              sis,gis;
270   const PetscInt  *isis,*igis;
271   PetscInt        n,*iis,nsis,ngis,rstart,i;
272 
273   PetscFunctionBegin;
274   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
275   PetscCall(MatFindNonzeroRows(a->B,&gis));
276   PetscCall(ISGetSize(gis,&ngis));
277   PetscCall(ISGetSize(sis,&nsis));
278   PetscCall(ISGetIndices(sis,&isis));
279   PetscCall(ISGetIndices(gis,&igis));
280 
281   PetscCall(PetscMalloc1(ngis+nsis,&iis));
282   PetscCall(PetscArraycpy(iis,igis,ngis));
283   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
284   n    = ngis + nsis;
285   PetscCall(PetscSortRemoveDupsInt(&n,iis));
286   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
287   for (i=0; i<n; i++) iis[i] += rstart;
288   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
289 
290   PetscCall(ISRestoreIndices(sis,&isis));
291   PetscCall(ISRestoreIndices(gis,&igis));
292   PetscCall(ISDestroy(&sis));
293   PetscCall(ISDestroy(&gis));
294   PetscFunctionReturn(0);
295 }
296 
297 /*
298   Local utility routine that creates a mapping from the global column
299 number to the local number in the off-diagonal part of the local
300 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
301 a slightly higher hash table cost; without it it is not scalable (each processor
302 has an order N integer array but is fast to access.
303 */
304 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscInt       n = aij->B->cmap->n,i;
308 
309   PetscFunctionBegin;
310   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
311 #if defined(PETSC_USE_CTABLE)
312   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
313   for (i=0; i<n; i++) {
314     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
315   }
316 #else
317   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
318   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
319   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
320 #endif
321   PetscFunctionReturn(0);
322 }
323 
324 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
325 { \
326     if (col <= lastcol1)  low1 = 0;     \
327     else                 high1 = nrow1; \
328     lastcol1 = col;\
329     while (high1-low1 > 5) { \
330       t = (low1+high1)/2; \
331       if (rp1[t] > col) high1 = t; \
332       else              low1  = t; \
333     } \
334       for (_i=low1; _i<high1; _i++) { \
335         if (rp1[_i] > col) break; \
336         if (rp1[_i] == col) { \
337           if (addv == ADD_VALUES) { \
338             ap1[_i] += value;   \
339             /* Not sure LogFlops will slow dow the code or not */ \
340             (void)PetscLogFlops(1.0);   \
341            } \
342           else                    ap1[_i] = value; \
343           goto a_noinsert; \
344         } \
345       }  \
346       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
347       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
348       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
349       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
350       N = nrow1++ - 1; a->nz++; high1++; \
351       /* shift up all the later entries in this row */ \
352       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
353       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
354       rp1[_i] = col;  \
355       ap1[_i] = value;  \
356       A->nonzerostate++;\
357       a_noinsert: ; \
358       ailen[row] = nrow1; \
359 }
360 
361 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
362   { \
363     if (col <= lastcol2) low2 = 0;                        \
364     else high2 = nrow2;                                   \
365     lastcol2 = col;                                       \
366     while (high2-low2 > 5) {                              \
367       t = (low2+high2)/2;                                 \
368       if (rp2[t] > col) high2 = t;                        \
369       else             low2  = t;                         \
370     }                                                     \
371     for (_i=low2; _i<high2; _i++) {                       \
372       if (rp2[_i] > col) break;                           \
373       if (rp2[_i] == col) {                               \
374         if (addv == ADD_VALUES) {                         \
375           ap2[_i] += value;                               \
376           (void)PetscLogFlops(1.0);                       \
377         }                                                 \
378         else                    ap2[_i] = value;          \
379         goto b_noinsert;                                  \
380       }                                                   \
381     }                                                     \
382     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
383     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
384     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
385     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
386     N = nrow2++ - 1; b->nz++; high2++;                    \
387     /* shift up all the later entries in this row */      \
388     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
389     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
390     rp2[_i] = col;                                        \
391     ap2[_i] = value;                                      \
392     B->nonzerostate++;                                    \
393     b_noinsert: ;                                         \
394     bilen[row] = nrow2;                                   \
395   }
396 
397 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
398 {
399   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
400   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
401   PetscInt       l,*garray = mat->garray,diag;
402   PetscScalar    *aa,*ba;
403 
404   PetscFunctionBegin;
405   /* code only works for square matrices A */
406 
407   /* find size of row to the left of the diagonal part */
408   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
409   row  = row - diag;
410   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
411     if (garray[b->j[b->i[row]+l]] > diag) break;
412   }
413   if (l) {
414     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
415     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
416     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
417   }
418 
419   /* diagonal part */
420   if (a->i[row+1]-a->i[row]) {
421     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
422     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
423     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
424   }
425 
426   /* right of diagonal part */
427   if (b->i[row+1]-b->i[row]-l) {
428     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
429     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
430     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
431   }
432   PetscFunctionReturn(0);
433 }
434 
435 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
436 {
437   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
438   PetscScalar    value = 0.0;
439   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
440   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
441   PetscBool      roworiented = aij->roworiented;
442 
443   /* Some Variables required in the macro */
444   Mat        A                    = aij->A;
445   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
446   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
447   PetscBool  ignorezeroentries    = a->ignorezeroentries;
448   Mat        B                    = aij->B;
449   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
450   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
451   MatScalar  *aa,*ba;
452   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
453   PetscInt   nonew;
454   MatScalar  *ap1,*ap2;
455 
456   PetscFunctionBegin;
457   PetscCall(MatSeqAIJGetArray(A,&aa));
458   PetscCall(MatSeqAIJGetArray(B,&ba));
459   for (i=0; i<m; i++) {
460     if (im[i] < 0) continue;
461     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
462     if (im[i] >= rstart && im[i] < rend) {
463       row      = im[i] - rstart;
464       lastcol1 = -1;
465       rp1      = aj + ai[row];
466       ap1      = aa + ai[row];
467       rmax1    = aimax[row];
468       nrow1    = ailen[row];
469       low1     = 0;
470       high1    = nrow1;
471       lastcol2 = -1;
472       rp2      = bj + bi[row];
473       ap2      = ba + bi[row];
474       rmax2    = bimax[row];
475       nrow2    = bilen[row];
476       low2     = 0;
477       high2    = nrow2;
478 
479       for (j=0; j<n; j++) {
480         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
481         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
482         if (in[j] >= cstart && in[j] < cend) {
483           col   = in[j] - cstart;
484           nonew = a->nonew;
485           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
486         } else if (in[j] < 0) {
487           continue;
488         } else {
489           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
490           if (mat->was_assembled) {
491             if (!aij->colmap) {
492               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
493             }
494 #if defined(PETSC_USE_CTABLE)
495             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
496             col--;
497 #else
498             col = aij->colmap[in[j]] - 1;
499 #endif
500             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
501               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
502               col  =  in[j];
503               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
504               B        = aij->B;
505               b        = (Mat_SeqAIJ*)B->data;
506               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
507               rp2      = bj + bi[row];
508               ap2      = ba + bi[row];
509               rmax2    = bimax[row];
510               nrow2    = bilen[row];
511               low2     = 0;
512               high2    = nrow2;
513               bm       = aij->B->rmap->n;
514               ba       = b->a;
515             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
516               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
517                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
518               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
519             }
520           } else col = in[j];
521           nonew = b->nonew;
522           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
523         }
524       }
525     } else {
526       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
527       if (!aij->donotstash) {
528         mat->assembled = PETSC_FALSE;
529         if (roworiented) {
530           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
531         } else {
532           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
533         }
534       }
535     }
536   }
537   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
538   PetscCall(MatSeqAIJRestoreArray(B,&ba));
539   PetscFunctionReturn(0);
540 }
541 
542 /*
543     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
544     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
545     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
546 */
547 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
548 {
549   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
550   Mat            A           = aij->A; /* diagonal part of the matrix */
551   Mat            B           = aij->B; /* offdiagonal part of the matrix */
552   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
553   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
554   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
555   PetscInt       *ailen      = a->ilen,*aj = a->j;
556   PetscInt       *bilen      = b->ilen,*bj = b->j;
557   PetscInt       am          = aij->A->rmap->n,j;
558   PetscInt       diag_so_far = 0,dnz;
559   PetscInt       offd_so_far = 0,onz;
560 
561   PetscFunctionBegin;
562   /* Iterate over all rows of the matrix */
563   for (j=0; j<am; j++) {
564     dnz = onz = 0;
565     /*  Iterate over all non-zero columns of the current row */
566     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
567       /* If column is in the diagonal */
568       if (mat_j[col] >= cstart && mat_j[col] < cend) {
569         aj[diag_so_far++] = mat_j[col] - cstart;
570         dnz++;
571       } else { /* off-diagonal entries */
572         bj[offd_so_far++] = mat_j[col];
573         onz++;
574       }
575     }
576     ailen[j] = dnz;
577     bilen[j] = onz;
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 /*
583     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
584     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
585     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
586     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
587     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
588 */
589 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
590 {
591   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
592   Mat            A      = aij->A; /* diagonal part of the matrix */
593   Mat            B      = aij->B; /* offdiagonal part of the matrix */
594   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
595   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
596   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
598   PetscInt       *ailen = a->ilen,*aj = a->j;
599   PetscInt       *bilen = b->ilen,*bj = b->j;
600   PetscInt       am     = aij->A->rmap->n,j;
601   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
602   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
603   PetscScalar    *aa = a->a,*ba = b->a;
604 
605   PetscFunctionBegin;
606   /* Iterate over all rows of the matrix */
607   for (j=0; j<am; j++) {
608     dnz_row = onz_row = 0;
609     rowstart_offd = full_offd_i[j];
610     rowstart_diag = full_diag_i[j];
611     /*  Iterate over all non-zero columns of the current row */
612     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
613       /* If column is in the diagonal */
614       if (mat_j[col] >= cstart && mat_j[col] < cend) {
615         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
616         aa[rowstart_diag+dnz_row] = mat_a[col];
617         dnz_row++;
618       } else { /* off-diagonal entries */
619         bj[rowstart_offd+onz_row] = mat_j[col];
620         ba[rowstart_offd+onz_row] = mat_a[col];
621         onz_row++;
622       }
623     }
624     ailen[j] = dnz_row;
625     bilen[j] = onz_row;
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
634   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* negative row */
639     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= rstart && idxm[i] < rend) {
641       row = idxm[i] - rstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* negative column */
644         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= cstart && idxn[j] < cend) {
646           col  = idxn[j] - cstart;
647           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
648         } else {
649           if (!aij->colmap) {
650             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
651           }
652 #if defined(PETSC_USE_CTABLE)
653           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
654           col--;
655 #else
656           col = aij->colmap[idxn[j]] - 1;
657 #endif
658           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
659           else {
660             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
672   PetscInt       nstash,reallocs;
673 
674   PetscFunctionBegin;
675   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
676 
677   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
678   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
679   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
680   PetscFunctionReturn(0);
681 }
682 
683 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
684 {
685   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
686   PetscMPIInt    n;
687   PetscInt       i,j,rstart,ncols,flg;
688   PetscInt       *row,*col;
689   PetscBool      other_disassembled;
690   PetscScalar    *val;
691 
692   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
693 
694   PetscFunctionBegin;
695   if (!aij->donotstash && !mat->nooffprocentries) {
696     while (1) {
697       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
698       if (!flg) break;
699 
700       for (i=0; i<n;) {
701         /* Now identify the consecutive vals belonging to the same row */
702         for (j=i,rstart=row[j]; j<n; j++) {
703           if (row[j] != rstart) break;
704         }
705         if (j < n) ncols = j-i;
706         else       ncols = n-i;
707         /* Now assemble all these values with a single function call */
708         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
709         i    = j;
710       }
711     }
712     PetscCall(MatStashScatterEnd_Private(&mat->stash));
713   }
714 #if defined(PETSC_HAVE_DEVICE)
715   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
716   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
717   if (mat->boundtocpu) {
718     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
719     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
720   }
721 #endif
722   PetscCall(MatAssemblyBegin(aij->A,mode));
723   PetscCall(MatAssemblyEnd(aij->A,mode));
724 
725   /* determine if any processor has disassembled, if so we must
726      also disassemble ourself, in order that we may reassemble. */
727   /*
728      if nonzero structure of submatrix B cannot change then we know that
729      no processor disassembled thus we can skip this stuff
730   */
731   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
732     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
733     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
734       PetscCall(MatDisAssemble_MPIAIJ(mat));
735     }
736   }
737   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
738     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
739   }
740   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
741 #if defined(PETSC_HAVE_DEVICE)
742   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
743 #endif
744   PetscCall(MatAssemblyBegin(aij->B,mode));
745   PetscCall(MatAssemblyEnd(aij->B,mode));
746 
747   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
748 
749   aij->rowvalues = NULL;
750 
751   PetscCall(VecDestroy(&aij->diag));
752 
753   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
754   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
755     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
756     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
757   }
758 #if defined(PETSC_HAVE_DEVICE)
759   mat->offloadmask = PETSC_OFFLOAD_BOTH;
760 #endif
761   PetscFunctionReturn(0);
762 }
763 
764 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
765 {
766   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
767 
768   PetscFunctionBegin;
769   PetscCall(MatZeroEntries(l->A));
770   PetscCall(MatZeroEntries(l->B));
771   PetscFunctionReturn(0);
772 }
773 
774 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
775 {
776   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
777   PetscObjectState sA, sB;
778   PetscInt        *lrows;
779   PetscInt         r, len;
780   PetscBool        cong, lch, gch;
781 
782   PetscFunctionBegin;
783   /* get locally owned rows */
784   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
785   PetscCall(MatHasCongruentLayouts(A,&cong));
786   /* fix right hand side if needed */
787   if (x && b) {
788     const PetscScalar *xx;
789     PetscScalar       *bb;
790 
791     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
792     PetscCall(VecGetArrayRead(x, &xx));
793     PetscCall(VecGetArray(b, &bb));
794     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
795     PetscCall(VecRestoreArrayRead(x, &xx));
796     PetscCall(VecRestoreArray(b, &bb));
797   }
798 
799   sA = mat->A->nonzerostate;
800   sB = mat->B->nonzerostate;
801 
802   if (diag != 0.0 && cong) {
803     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
804     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
805   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
806     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
807     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
808     PetscInt   nnwA, nnwB;
809     PetscBool  nnzA, nnzB;
810 
811     nnwA = aijA->nonew;
812     nnwB = aijB->nonew;
813     nnzA = aijA->keepnonzeropattern;
814     nnzB = aijB->keepnonzeropattern;
815     if (!nnzA) {
816       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
817       aijA->nonew = 0;
818     }
819     if (!nnzB) {
820       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
821       aijB->nonew = 0;
822     }
823     /* Must zero here before the next loop */
824     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
825     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
826     for (r = 0; r < len; ++r) {
827       const PetscInt row = lrows[r] + A->rmap->rstart;
828       if (row >= A->cmap->N) continue;
829       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
830     }
831     aijA->nonew = nnwA;
832     aijB->nonew = nnwB;
833   } else {
834     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
835     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
836   }
837   PetscCall(PetscFree(lrows));
838   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
839   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
840 
841   /* reduce nonzerostate */
842   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
843   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
844   if (gch) A->nonzerostate++;
845   PetscFunctionReturn(0);
846 }
847 
848 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
849 {
850   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
851   PetscMPIInt       n = A->rmap->n;
852   PetscInt          i,j,r,m,len = 0;
853   PetscInt          *lrows,*owners = A->rmap->range;
854   PetscMPIInt       p = 0;
855   PetscSFNode       *rrows;
856   PetscSF           sf;
857   const PetscScalar *xx;
858   PetscScalar       *bb,*mask,*aij_a;
859   Vec               xmask,lmask;
860   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
861   const PetscInt    *aj, *ii,*ridx;
862   PetscScalar       *aa;
863 
864   PetscFunctionBegin;
865   /* Create SF where leaves are input rows and roots are owned rows */
866   PetscCall(PetscMalloc1(n, &lrows));
867   for (r = 0; r < n; ++r) lrows[r] = -1;
868   PetscCall(PetscMalloc1(N, &rrows));
869   for (r = 0; r < N; ++r) {
870     const PetscInt idx   = rows[r];
871     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
872     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
873       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
874     }
875     rrows[r].rank  = p;
876     rrows[r].index = rows[r] - owners[p];
877   }
878   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
879   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
880   /* Collect flags for rows to be zeroed */
881   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
882   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
883   PetscCall(PetscSFDestroy(&sf));
884   /* Compress and put in row numbers */
885   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
886   /* zero diagonal part of matrix */
887   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
888   /* handle off diagonal part of matrix */
889   PetscCall(MatCreateVecs(A,&xmask,NULL));
890   PetscCall(VecDuplicate(l->lvec,&lmask));
891   PetscCall(VecGetArray(xmask,&bb));
892   for (i=0; i<len; i++) bb[lrows[i]] = 1;
893   PetscCall(VecRestoreArray(xmask,&bb));
894   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
895   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
896   PetscCall(VecDestroy(&xmask));
897   if (x && b) { /* this code is buggy when the row and column layout don't match */
898     PetscBool cong;
899 
900     PetscCall(MatHasCongruentLayouts(A,&cong));
901     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
902     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
903     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
904     PetscCall(VecGetArrayRead(l->lvec,&xx));
905     PetscCall(VecGetArray(b,&bb));
906   }
907   PetscCall(VecGetArray(lmask,&mask));
908   /* remove zeroed rows of off diagonal matrix */
909   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij_a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij_a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x && b) {
951     PetscCall(VecRestoreArray(b,&bb));
952     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
953   }
954   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
955   PetscCall(VecRestoreArray(lmask,&mask));
956   PetscCall(VecDestroy(&lmask));
957   PetscCall(PetscFree(lrows));
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscInt       nt;
971   VecScatter     Mvctx = a->Mvctx;
972 
973   PetscFunctionBegin;
974   PetscCall(VecGetLocalSize(xx,&nt));
975   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
976   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
977   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
978   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
979   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986 
987   PetscFunctionBegin;
988   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
993 {
994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
995   VecScatter     Mvctx = a->Mvctx;
996 
997   PetscFunctionBegin;
998   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
999   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
1000   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1001   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1006 {
1007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1008 
1009   PetscFunctionBegin;
1010   /* do nondiagonal part */
1011   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1012   /* do local part */
1013   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1014   /* add partial results together */
1015   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1016   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1021 {
1022   MPI_Comm       comm;
1023   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1024   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1025   IS             Me,Notme;
1026   PetscInt       M,N,first,last,*notme,i;
1027   PetscBool      lf;
1028   PetscMPIInt    size;
1029 
1030   PetscFunctionBegin;
1031   /* Easy test: symmetric diagonal block */
1032   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1033   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1034   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1035   if (!*f) PetscFunctionReturn(0);
1036   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1037   PetscCallMPI(MPI_Comm_size(comm,&size));
1038   if (size == 1) PetscFunctionReturn(0);
1039 
1040   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1041   PetscCall(MatGetSize(Amat,&M,&N));
1042   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1043   PetscCall(PetscMalloc1(N-last+first,&notme));
1044   for (i=0; i<first; i++) notme[i] = i;
1045   for (i=last; i<M; i++) notme[i-last+first] = i;
1046   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1047   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1048   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1049   Aoff = Aoffs[0];
1050   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1051   Boff = Boffs[0];
1052   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1053   PetscCall(MatDestroyMatrices(1,&Aoffs));
1054   PetscCall(MatDestroyMatrices(1,&Boffs));
1055   PetscCall(ISDestroy(&Me));
1056   PetscCall(ISDestroy(&Notme));
1057   PetscCall(PetscFree(notme));
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1062 {
1063   PetscFunctionBegin;
1064   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1075   /* do local part */
1076   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1077   /* add partial results together */
1078   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1079   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /*
1084   This only works correctly for square matrices where the subblock A->A is the
1085    diagonal block
1086 */
1087 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090 
1091   PetscFunctionBegin;
1092   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1093   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1094   PetscCall(MatGetDiagonal(a->A,v));
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101 
1102   PetscFunctionBegin;
1103   PetscCall(MatScale(a->A,aa));
1104   PetscCall(MatScale(a->B,aa));
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1109 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1110 {
1111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(PetscSFDestroy(&aij->coo_sf));
1115   PetscCall(PetscFree(aij->Aperm1));
1116   PetscCall(PetscFree(aij->Bperm1));
1117   PetscCall(PetscFree(aij->Ajmap1));
1118   PetscCall(PetscFree(aij->Bjmap1));
1119 
1120   PetscCall(PetscFree(aij->Aimap2));
1121   PetscCall(PetscFree(aij->Bimap2));
1122   PetscCall(PetscFree(aij->Aperm2));
1123   PetscCall(PetscFree(aij->Bperm2));
1124   PetscCall(PetscFree(aij->Ajmap2));
1125   PetscCall(PetscFree(aij->Bjmap2));
1126 
1127   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1128   PetscCall(PetscFree(aij->Cperm1));
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1133 {
1134   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1135 
1136   PetscFunctionBegin;
1137 #if defined(PETSC_USE_LOG)
1138   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1139 #endif
1140   PetscCall(MatStashDestroy_Private(&mat->stash));
1141   PetscCall(VecDestroy(&aij->diag));
1142   PetscCall(MatDestroy(&aij->A));
1143   PetscCall(MatDestroy(&aij->B));
1144 #if defined(PETSC_USE_CTABLE)
1145   PetscCall(PetscTableDestroy(&aij->colmap));
1146 #else
1147   PetscCall(PetscFree(aij->colmap));
1148 #endif
1149   PetscCall(PetscFree(aij->garray));
1150   PetscCall(VecDestroy(&aij->lvec));
1151   PetscCall(VecScatterDestroy(&aij->Mvctx));
1152   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1153   PetscCall(PetscFree(aij->ld));
1154 
1155   /* Free COO */
1156   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1157 
1158   PetscCall(PetscFree(mat->data));
1159 
1160   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1161   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1162 
1163   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1173 #if defined(PETSC_HAVE_CUDA)
1174   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1175 #endif
1176 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1178 #endif
1179   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1180 #if defined(PETSC_HAVE_ELEMENTAL)
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1182 #endif
1183 #if defined(PETSC_HAVE_SCALAPACK)
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1185 #endif
1186 #if defined(PETSC_HAVE_HYPRE)
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1189 #endif
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1196 #if defined(PETSC_HAVE_MKL_SPARSE)
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1198 #endif
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1202   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1203   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1208 {
1209   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1210   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1211   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1212   const PetscInt    *garray = aij->garray;
1213   const PetscScalar *aa,*ba;
1214   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1215   PetscInt          *rowlens;
1216   PetscInt          *colidxs;
1217   PetscScalar       *matvals;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(PetscViewerSetUp(viewer));
1221 
1222   M  = mat->rmap->N;
1223   N  = mat->cmap->N;
1224   m  = mat->rmap->n;
1225   rs = mat->rmap->rstart;
1226   cs = mat->cmap->rstart;
1227   nz = A->nz + B->nz;
1228 
1229   /* write matrix header */
1230   header[0] = MAT_FILE_CLASSID;
1231   header[1] = M; header[2] = N; header[3] = nz;
1232   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1233   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1234 
1235   /* fill in and store row lengths  */
1236   PetscCall(PetscMalloc1(m,&rowlens));
1237   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1238   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1239   PetscCall(PetscFree(rowlens));
1240 
1241   /* fill in and store column indices */
1242   PetscCall(PetscMalloc1(nz,&colidxs));
1243   for (cnt=0, i=0; i<m; i++) {
1244     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1245       if (garray[B->j[jb]] > cs) break;
1246       colidxs[cnt++] = garray[B->j[jb]];
1247     }
1248     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1249       colidxs[cnt++] = A->j[ja] + cs;
1250     for (; jb<B->i[i+1]; jb++)
1251       colidxs[cnt++] = garray[B->j[jb]];
1252   }
1253   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1254   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1255   PetscCall(PetscFree(colidxs));
1256 
1257   /* fill in and store nonzero values */
1258   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1259   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1260   PetscCall(PetscMalloc1(nz,&matvals));
1261   for (cnt=0, i=0; i<m; i++) {
1262     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1263       if (garray[B->j[jb]] > cs) break;
1264       matvals[cnt++] = ba[jb];
1265     }
1266     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1267       matvals[cnt++] = aa[ja];
1268     for (; jb<B->i[i+1]; jb++)
1269       matvals[cnt++] = ba[jb];
1270   }
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1272   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1273   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1274   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1275   PetscCall(PetscFree(matvals));
1276 
1277   /* write block size option to the viewer's .info file */
1278   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #include <petscdraw.h>
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscMPIInt       rank = aij->rank,size = aij->size;
1287   PetscBool         isdraw,iascii,isbinary;
1288   PetscViewer       sviewer;
1289   PetscViewerFormat format;
1290 
1291   PetscFunctionBegin;
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1294   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1295   if (iascii) {
1296     PetscCall(PetscViewerGetFormat(viewer,&format));
1297     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1298       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1299       PetscCall(PetscMalloc1(size,&nz));
1300       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1301       for (i=0; i<(PetscInt)size; i++) {
1302         nmax = PetscMax(nmax,nz[i]);
1303         nmin = PetscMin(nmin,nz[i]);
1304         navg += nz[i];
1305       }
1306       PetscCall(PetscFree(nz));
1307       navg = navg/size;
1308       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1309       PetscFunctionReturn(0);
1310     }
1311     PetscCall(PetscViewerGetFormat(viewer,&format));
1312     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1313       MatInfo   info;
1314       PetscInt *inodes=NULL;
1315 
1316       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1317       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1318       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1319       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1320       if (!inodes) {
1321         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1322                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1323       } else {
1324         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1325                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1326       }
1327       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1328       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1329       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1330       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1331       PetscCall(PetscViewerFlush(viewer));
1332       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1333       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1334       PetscCall(VecScatterView(aij->Mvctx,viewer));
1335       PetscFunctionReturn(0);
1336     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1337       PetscInt inodecount,inodelimit,*inodes;
1338       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1339       if (inodes) {
1340         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1341       } else {
1342         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1343       }
1344       PetscFunctionReturn(0);
1345     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1346       PetscFunctionReturn(0);
1347     }
1348   } else if (isbinary) {
1349     if (size == 1) {
1350       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1351       PetscCall(MatView(aij->A,viewer));
1352     } else {
1353       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1354     }
1355     PetscFunctionReturn(0);
1356   } else if (iascii && size == 1) {
1357     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1358     PetscCall(MatView(aij->A,viewer));
1359     PetscFunctionReturn(0);
1360   } else if (isdraw) {
1361     PetscDraw draw;
1362     PetscBool isnull;
1363     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1364     PetscCall(PetscDrawIsNull(draw,&isnull));
1365     if (isnull) PetscFunctionReturn(0);
1366   }
1367 
1368   { /* assemble the entire matrix onto first processor */
1369     Mat A = NULL, Av;
1370     IS  isrow,iscol;
1371 
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1373     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1374     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1375     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1376 /*  The commented code uses MatCreateSubMatrices instead */
1377 /*
1378     Mat *AA, A = NULL, Av;
1379     IS  isrow,iscol;
1380 
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1382     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1383     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1384     if (rank == 0) {
1385        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1386        A    = AA[0];
1387        Av   = AA[0];
1388     }
1389     PetscCall(MatDestroySubMatrices(1,&AA));
1390 */
1391     PetscCall(ISDestroy(&iscol));
1392     PetscCall(ISDestroy(&isrow));
1393     /*
1394        Everyone has to call to draw the matrix since the graphics waits are
1395        synchronized across all processors that share the PetscDraw object
1396     */
1397     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1398     if (rank == 0) {
1399       if (((PetscObject)mat)->name) {
1400         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1401       }
1402       PetscCall(MatView_SeqAIJ(Av,sviewer));
1403     }
1404     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1405     PetscCall(PetscViewerFlush(viewer));
1406     PetscCall(MatDestroy(&A));
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1412 {
1413   PetscBool      iascii,isdraw,issocket,isbinary;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1418   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1419   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1420   if (iascii || isdraw || isbinary || issocket) {
1421     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1427 {
1428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1429   Vec            bb1 = NULL;
1430   PetscBool      hasop;
1431 
1432   PetscFunctionBegin;
1433   if (flag == SOR_APPLY_UPPER) {
1434     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1435     PetscFunctionReturn(0);
1436   }
1437 
1438   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1439     PetscCall(VecDuplicate(bb,&bb1));
1440   }
1441 
1442   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1445       its--;
1446     }
1447 
1448     while (its--) {
1449       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1450       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1451 
1452       /* update rhs: bb1 = bb - B*x */
1453       PetscCall(VecScale(mat->lvec,-1.0));
1454       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1455 
1456       /* local sweep */
1457       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1458     }
1459   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1460     if (flag & SOR_ZERO_INITIAL_GUESS) {
1461       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1462       its--;
1463     }
1464     while (its--) {
1465       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1466       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       PetscCall(VecScale(mat->lvec,-1.0));
1470       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1471 
1472       /* local sweep */
1473       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1474     }
1475   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1476     if (flag & SOR_ZERO_INITIAL_GUESS) {
1477       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1478       its--;
1479     }
1480     while (its--) {
1481       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1482       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       PetscCall(VecScale(mat->lvec,-1.0));
1486       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1487 
1488       /* local sweep */
1489       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1490     }
1491   } else if (flag & SOR_EISENSTAT) {
1492     Vec xx1;
1493 
1494     PetscCall(VecDuplicate(bb,&xx1));
1495     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1496 
1497     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1498     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1499     if (!mat->diag) {
1500       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1501       PetscCall(MatGetDiagonal(matin,mat->diag));
1502     }
1503     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1504     if (hasop) {
1505       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1506     } else {
1507       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1508     }
1509     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1510 
1511     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1512 
1513     /* local sweep */
1514     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1515     PetscCall(VecAXPY(xx,1.0,xx1));
1516     PetscCall(VecDestroy(&xx1));
1517   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1518 
1519   PetscCall(VecDestroy(&bb1));
1520 
1521   matin->factorerrortype = mat->A->factorerrortype;
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1526 {
1527   Mat            aA,aB,Aperm;
1528   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1529   PetscScalar    *aa,*ba;
1530   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1531   PetscSF        rowsf,sf;
1532   IS             parcolp = NULL;
1533   PetscBool      done;
1534 
1535   PetscFunctionBegin;
1536   PetscCall(MatGetLocalSize(A,&m,&n));
1537   PetscCall(ISGetIndices(rowp,&rwant));
1538   PetscCall(ISGetIndices(colp,&cwant));
1539   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1540 
1541   /* Invert row permutation to find out where my rows should go */
1542   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1543   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1544   PetscCall(PetscSFSetFromOptions(rowsf));
1545   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1546   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1547   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1548 
1549   /* Invert column permutation to find out where my columns should go */
1550   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1551   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1552   PetscCall(PetscSFSetFromOptions(sf));
1553   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1554   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1555   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1556   PetscCall(PetscSFDestroy(&sf));
1557 
1558   PetscCall(ISRestoreIndices(rowp,&rwant));
1559   PetscCall(ISRestoreIndices(colp,&cwant));
1560   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1561 
1562   /* Find out where my gcols should go */
1563   PetscCall(MatGetSize(aB,NULL,&ng));
1564   PetscCall(PetscMalloc1(ng,&gcdest));
1565   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1566   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1567   PetscCall(PetscSFSetFromOptions(sf));
1568   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&sf));
1571 
1572   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1573   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1574   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1575   for (i=0; i<m; i++) {
1576     PetscInt    row = rdest[i];
1577     PetscMPIInt rowner;
1578     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1579     for (j=ai[i]; j<ai[i+1]; j++) {
1580       PetscInt    col = cdest[aj[j]];
1581       PetscMPIInt cowner;
1582       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1583       if (rowner == cowner) dnnz[i]++;
1584       else onnz[i]++;
1585     }
1586     for (j=bi[i]; j<bi[i+1]; j++) {
1587       PetscInt    col = gcdest[bj[j]];
1588       PetscMPIInt cowner;
1589       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1590       if (rowner == cowner) dnnz[i]++;
1591       else onnz[i]++;
1592     }
1593   }
1594   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1596   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1597   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1598   PetscCall(PetscSFDestroy(&rowsf));
1599 
1600   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1601   PetscCall(MatSeqAIJGetArray(aA,&aa));
1602   PetscCall(MatSeqAIJGetArray(aB,&ba));
1603   for (i=0; i<m; i++) {
1604     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1605     PetscInt j0,rowlen;
1606     rowlen = ai[i+1] - ai[i];
1607     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1608       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1609       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1610     }
1611     rowlen = bi[i+1] - bi[i];
1612     for (j0=j=0; j<rowlen; j0=j) {
1613       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1614       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1615     }
1616   }
1617   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1618   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1619   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1620   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1621   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1622   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1623   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1624   PetscCall(PetscFree3(work,rdest,cdest));
1625   PetscCall(PetscFree(gcdest));
1626   if (parcolp) PetscCall(ISDestroy(&colp));
1627   *B = Aperm;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1632 {
1633   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1634 
1635   PetscFunctionBegin;
1636   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1637   if (ghosts) *ghosts = aij->garray;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1642 {
1643   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1644   Mat            A    = mat->A,B = mat->B;
1645   PetscLogDouble isend[5],irecv[5];
1646 
1647   PetscFunctionBegin;
1648   info->block_size = 1.0;
1649   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1650 
1651   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1652   isend[3] = info->memory;  isend[4] = info->mallocs;
1653 
1654   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1655 
1656   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1657   isend[3] += info->memory;  isend[4] += info->mallocs;
1658   if (flag == MAT_LOCAL) {
1659     info->nz_used      = isend[0];
1660     info->nz_allocated = isend[1];
1661     info->nz_unneeded  = isend[2];
1662     info->memory       = isend[3];
1663     info->mallocs      = isend[4];
1664   } else if (flag == MAT_GLOBAL_MAX) {
1665     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   } else if (flag == MAT_GLOBAL_SUM) {
1673     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1674 
1675     info->nz_used      = irecv[0];
1676     info->nz_allocated = irecv[1];
1677     info->nz_unneeded  = irecv[2];
1678     info->memory       = irecv[3];
1679     info->mallocs      = irecv[4];
1680   }
1681   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1682   info->fill_ratio_needed = 0;
1683   info->factor_mallocs    = 0;
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1688 {
1689   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1690 
1691   PetscFunctionBegin;
1692   switch (op) {
1693   case MAT_NEW_NONZERO_LOCATIONS:
1694   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696   case MAT_KEEP_NONZERO_PATTERN:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698   case MAT_USE_INODES:
1699   case MAT_IGNORE_ZERO_ENTRIES:
1700   case MAT_FORM_EXPLICIT_TRANSPOSE:
1701     MatCheckPreallocated(A,1);
1702     PetscCall(MatSetOption(a->A,op,flg));
1703     PetscCall(MatSetOption(a->B,op,flg));
1704     break;
1705   case MAT_ROW_ORIENTED:
1706     MatCheckPreallocated(A,1);
1707     a->roworiented = flg;
1708 
1709     PetscCall(MatSetOption(a->A,op,flg));
1710     PetscCall(MatSetOption(a->B,op,flg));
1711     break;
1712   case MAT_FORCE_DIAGONAL_ENTRIES:
1713   case MAT_SORTED_FULL:
1714     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1715     break;
1716   case MAT_IGNORE_OFF_PROC_ENTRIES:
1717     a->donotstash = flg;
1718     break;
1719   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1720   case MAT_SPD:
1721   case MAT_SYMMETRIC:
1722   case MAT_STRUCTURALLY_SYMMETRIC:
1723   case MAT_HERMITIAN:
1724   case MAT_SYMMETRY_ETERNAL:
1725   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1726   case MAT_SPD_ETERNAL:
1727     /* if the diagonal matrix is square it inherits some of the properties above */
1728     break;
1729   case MAT_SUBMAT_SINGLEIS:
1730     A->submat_singleis = flg;
1731     break;
1732   case MAT_STRUCTURE_ONLY:
1733     /* The option is handled directly by MatSetOption() */
1734     break;
1735   default:
1736     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1745   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1746   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1747   PetscInt       *cmap,*idx_p;
1748 
1749   PetscFunctionBegin;
1750   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1751   mat->getrowactive = PETSC_TRUE;
1752 
1753   if (!mat->rowvalues && (idx || v)) {
1754     /*
1755         allocate enough space to hold information from the longest row.
1756     */
1757     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1758     PetscInt   max = 1,tmp;
1759     for (i=0; i<matin->rmap->n; i++) {
1760       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1761       if (max < tmp) max = tmp;
1762     }
1763     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1764   }
1765 
1766   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1767   lrow = row - rstart;
1768 
1769   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1770   if (!v)   {pvA = NULL; pvB = NULL;}
1771   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1772   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1773   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1774   nztot = nzA + nzB;
1775 
1776   cmap = mat->garray;
1777   if (v  || idx) {
1778     if (nztot) {
1779       /* Sort by increasing column numbers, assuming A and B already sorted */
1780       PetscInt imark = -1;
1781       if (v) {
1782         *v = v_p = mat->rowvalues;
1783         for (i=0; i<nzB; i++) {
1784           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1785           else break;
1786         }
1787         imark = i;
1788         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1789         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1790       }
1791       if (idx) {
1792         *idx = idx_p = mat->rowindices;
1793         if (imark > -1) {
1794           for (i=0; i<imark; i++) {
1795             idx_p[i] = cmap[cworkB[i]];
1796           }
1797         } else {
1798           for (i=0; i<nzB; i++) {
1799             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1800             else break;
1801           }
1802           imark = i;
1803         }
1804         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1805         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1806       }
1807     } else {
1808       if (idx) *idx = NULL;
1809       if (v)   *v   = NULL;
1810     }
1811   }
1812   *nz  = nztot;
1813   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1814   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1819 {
1820   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1821 
1822   PetscFunctionBegin;
1823   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1824   aij->getrowactive = PETSC_FALSE;
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1829 {
1830   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1831   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1832   PetscInt        i,j,cstart = mat->cmap->rstart;
1833   PetscReal       sum = 0.0;
1834   const MatScalar *v,*amata,*bmata;
1835 
1836   PetscFunctionBegin;
1837   if (aij->size == 1) {
1838     PetscCall(MatNorm(aij->A,type,norm));
1839   } else {
1840     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1841     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1842     if (type == NORM_FROBENIUS) {
1843       v = amata;
1844       for (i=0; i<amat->nz; i++) {
1845         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1846       }
1847       v = bmata;
1848       for (i=0; i<bmat->nz; i++) {
1849         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1850       }
1851       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1852       *norm = PetscSqrtReal(*norm);
1853       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1854     } else if (type == NORM_1) { /* max column norm */
1855       PetscReal *tmp,*tmp2;
1856       PetscInt  *jj,*garray = aij->garray;
1857       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1858       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1859       *norm = 0.0;
1860       v     = amata; jj = amat->j;
1861       for (j=0; j<amat->nz; j++) {
1862         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1863       }
1864       v = bmata; jj = bmat->j;
1865       for (j=0; j<bmat->nz; j++) {
1866         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1867       }
1868       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1869       for (j=0; j<mat->cmap->N; j++) {
1870         if (tmp2[j] > *norm) *norm = tmp2[j];
1871       }
1872       PetscCall(PetscFree(tmp));
1873       PetscCall(PetscFree(tmp2));
1874       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1875     } else if (type == NORM_INFINITY) { /* max row norm */
1876       PetscReal ntemp = 0.0;
1877       for (j=0; j<aij->A->rmap->n; j++) {
1878         v   = amata + amat->i[j];
1879         sum = 0.0;
1880         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v); v++;
1882         }
1883         v = bmata + bmat->i[j];
1884         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1885           sum += PetscAbsScalar(*v); v++;
1886         }
1887         if (sum > ntemp) ntemp = sum;
1888       }
1889       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1890       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1891     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1892     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1893     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1899 {
1900   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1901   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1902   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1903   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1904   Mat             B,A_diag,*B_diag;
1905   const MatScalar *pbv,*bv;
1906 
1907   PetscFunctionBegin;
1908   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*matout));
1909   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1910   ai = Aloc->i; aj = Aloc->j;
1911   bi = Bloc->i; bj = Bloc->j;
1912   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1913     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1914     PetscSFNode          *oloc;
1915     PETSC_UNUSED PetscSF sf;
1916 
1917     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1918     /* compute d_nnz for preallocation */
1919     PetscCall(PetscArrayzero(d_nnz,na));
1920     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1921     /* compute local off-diagonal contributions */
1922     PetscCall(PetscArrayzero(g_nnz,nb));
1923     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1924     /* map those to global */
1925     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1926     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1927     PetscCall(PetscSFSetFromOptions(sf));
1928     PetscCall(PetscArrayzero(o_nnz,na));
1929     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1930     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1931     PetscCall(PetscSFDestroy(&sf));
1932 
1933     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1934     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1935     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1936     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1937     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1938     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1939   } else {
1940     B    = *matout;
1941     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1942   }
1943 
1944   b           = (Mat_MPIAIJ*)B->data;
1945   A_diag      = a->A;
1946   B_diag      = &b->A;
1947   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1948   A_diag_ncol = A_diag->cmap->N;
1949   B_diag_ilen = sub_B_diag->ilen;
1950   B_diag_i    = sub_B_diag->i;
1951 
1952   /* Set ilen for diagonal of B */
1953   for (i=0; i<A_diag_ncol; i++) {
1954     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1955   }
1956 
1957   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1958   very quickly (=without using MatSetValues), because all writes are local. */
1959   PetscCall(MatTransposeSetPrecursor(A_diag,*B_diag));
1960   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1961 
1962   /* copy over the B part */
1963   PetscCall(PetscMalloc1(bi[mb],&cols));
1964   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1965   pbv  = bv;
1966   row  = A->rmap->rstart;
1967   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1968   cols_tmp = cols;
1969   for (i=0; i<mb; i++) {
1970     ncol = bi[i+1]-bi[i];
1971     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1972     row++;
1973     pbv += ncol; cols_tmp += ncol;
1974   }
1975   PetscCall(PetscFree(cols));
1976   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1977 
1978   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1979   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1980   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1981     *matout = B;
1982   } else {
1983     PetscCall(MatHeaderMerge(A,&B));
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1989 {
1990   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1991   Mat            a    = aij->A,b = aij->B;
1992   PetscInt       s1,s2,s3;
1993 
1994   PetscFunctionBegin;
1995   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1996   if (rr) {
1997     PetscCall(VecGetLocalSize(rr,&s1));
1998     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1999     /* Overlap communication with computation. */
2000     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2001   }
2002   if (ll) {
2003     PetscCall(VecGetLocalSize(ll,&s1));
2004     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2005     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
2006   }
2007   /* scale  the diagonal block */
2008   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2009 
2010   if (rr) {
2011     /* Do a scatter end and then right scale the off-diagonal block */
2012     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2013     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2014   }
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2019 {
2020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2021 
2022   PetscFunctionBegin;
2023   PetscCall(MatSetUnfactored(a->A));
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2028 {
2029   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2030   Mat            a,b,c,d;
2031   PetscBool      flg;
2032 
2033   PetscFunctionBegin;
2034   a = matA->A; b = matA->B;
2035   c = matB->A; d = matB->B;
2036 
2037   PetscCall(MatEqual(a,c,&flg));
2038   if (flg) {
2039     PetscCall(MatEqual(b,d,&flg));
2040   }
2041   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2046 {
2047   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2048   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2049 
2050   PetscFunctionBegin;
2051   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2052   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2053     /* because of the column compression in the off-processor part of the matrix a->B,
2054        the number of columns in a->B and b->B may be different, hence we cannot call
2055        the MatCopy() directly on the two parts. If need be, we can provide a more
2056        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2057        then copying the submatrices */
2058     PetscCall(MatCopy_Basic(A,B,str));
2059   } else {
2060     PetscCall(MatCopy(a->A,b->A,str));
2061     PetscCall(MatCopy(a->B,b->B,str));
2062   }
2063   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2068 {
2069   PetscFunctionBegin;
2070   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 /*
2075    Computes the number of nonzeros per row needed for preallocation when X and Y
2076    have different nonzero structure.
2077 */
2078 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2079 {
2080   PetscInt       i,j,k,nzx,nzy;
2081 
2082   PetscFunctionBegin;
2083   /* Set the number of nonzeros in the new matrix */
2084   for (i=0; i<m; i++) {
2085     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2086     nzx = xi[i+1] - xi[i];
2087     nzy = yi[i+1] - yi[i];
2088     nnz[i] = 0;
2089     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2090       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2091       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2092       nnz[i]++;
2093     }
2094     for (; k<nzy; k++) nnz[i]++;
2095   }
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2100 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2101 {
2102   PetscInt       m = Y->rmap->N;
2103   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2104   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2105 
2106   PetscFunctionBegin;
2107   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2112 {
2113   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2114 
2115   PetscFunctionBegin;
2116   if (str == SAME_NONZERO_PATTERN) {
2117     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2118     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2119   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2120     PetscCall(MatAXPY_Basic(Y,a,X,str));
2121   } else {
2122     Mat      B;
2123     PetscInt *nnz_d,*nnz_o;
2124 
2125     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2126     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2127     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2128     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2129     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2130     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2131     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2132     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2133     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2134     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2135     PetscCall(MatHeaderMerge(Y,&B));
2136     PetscCall(PetscFree(nnz_d));
2137     PetscCall(PetscFree(nnz_o));
2138   }
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2143 
2144 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2145 {
2146   PetscFunctionBegin;
2147   if (PetscDefined(USE_COMPLEX)) {
2148     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2149 
2150     PetscCall(MatConjugate_SeqAIJ(aij->A));
2151     PetscCall(MatConjugate_SeqAIJ(aij->B));
2152   }
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2157 {
2158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2159 
2160   PetscFunctionBegin;
2161   PetscCall(MatRealPart(a->A));
2162   PetscCall(MatRealPart(a->B));
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2167 {
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2169 
2170   PetscFunctionBegin;
2171   PetscCall(MatImaginaryPart(a->A));
2172   PetscCall(MatImaginaryPart(a->B));
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2177 {
2178   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2179   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2180   PetscScalar       *va,*vv;
2181   Vec               vB,vA;
2182   const PetscScalar *vb;
2183 
2184   PetscFunctionBegin;
2185   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2186   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2187 
2188   PetscCall(VecGetArrayWrite(vA,&va));
2189   if (idx) {
2190     for (i=0; i<m; i++) {
2191       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2192     }
2193   }
2194 
2195   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2196   PetscCall(PetscMalloc1(m,&idxb));
2197   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2198 
2199   PetscCall(VecGetArrayWrite(v,&vv));
2200   PetscCall(VecGetArrayRead(vB,&vb));
2201   for (i=0; i<m; i++) {
2202     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2203       vv[i] = vb[i];
2204       if (idx) idx[i] = a->garray[idxb[i]];
2205     } else {
2206       vv[i] = va[i];
2207       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2208         idx[i] = a->garray[idxb[i]];
2209     }
2210   }
2211   PetscCall(VecRestoreArrayWrite(vA,&vv));
2212   PetscCall(VecRestoreArrayWrite(vA,&va));
2213   PetscCall(VecRestoreArrayRead(vB,&vb));
2214   PetscCall(PetscFree(idxb));
2215   PetscCall(VecDestroy(&vA));
2216   PetscCall(VecDestroy(&vB));
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2221 {
2222   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2223   PetscInt          m = A->rmap->n,n = A->cmap->n;
2224   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2225   PetscInt          *cmap  = mat->garray;
2226   PetscInt          *diagIdx, *offdiagIdx;
2227   Vec               diagV, offdiagV;
2228   PetscScalar       *a, *diagA, *offdiagA;
2229   const PetscScalar *ba,*bav;
2230   PetscInt          r,j,col,ncols,*bi,*bj;
2231   Mat               B = mat->B;
2232   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2233 
2234   PetscFunctionBegin;
2235   /* When a process holds entire A and other processes have no entry */
2236   if (A->cmap->N == n) {
2237     PetscCall(VecGetArrayWrite(v,&diagA));
2238     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2239     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2240     PetscCall(VecDestroy(&diagV));
2241     PetscCall(VecRestoreArrayWrite(v,&diagA));
2242     PetscFunctionReturn(0);
2243   } else if (n == 0) {
2244     if (m) {
2245       PetscCall(VecGetArrayWrite(v,&a));
2246       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2247       PetscCall(VecRestoreArrayWrite(v,&a));
2248     }
2249     PetscFunctionReturn(0);
2250   }
2251 
2252   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2253   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2255   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2256 
2257   /* Get offdiagIdx[] for implicit 0.0 */
2258   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2259   ba   = bav;
2260   bi   = b->i;
2261   bj   = b->j;
2262   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2263   for (r = 0; r < m; r++) {
2264     ncols = bi[r+1] - bi[r];
2265     if (ncols == A->cmap->N - n) { /* Brow is dense */
2266       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2267     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2268       offdiagA[r] = 0.0;
2269 
2270       /* Find first hole in the cmap */
2271       for (j=0; j<ncols; j++) {
2272         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2273         if (col > j && j < cstart) {
2274           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2275           break;
2276         } else if (col > j + n && j >= cstart) {
2277           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2278           break;
2279         }
2280       }
2281       if (j == ncols && ncols < A->cmap->N - n) {
2282         /* a hole is outside compressed Bcols */
2283         if (ncols == 0) {
2284           if (cstart) {
2285             offdiagIdx[r] = 0;
2286           } else offdiagIdx[r] = cend;
2287         } else { /* ncols > 0 */
2288           offdiagIdx[r] = cmap[ncols-1] + 1;
2289           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2290         }
2291       }
2292     }
2293 
2294     for (j=0; j<ncols; j++) {
2295       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2296       ba++; bj++;
2297     }
2298   }
2299 
2300   PetscCall(VecGetArrayWrite(v, &a));
2301   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2302   for (r = 0; r < m; ++r) {
2303     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2304       a[r]   = diagA[r];
2305       if (idx) idx[r] = cstart + diagIdx[r];
2306     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2307       a[r] = diagA[r];
2308       if (idx) {
2309         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2310           idx[r] = cstart + diagIdx[r];
2311         } else idx[r] = offdiagIdx[r];
2312       }
2313     } else {
2314       a[r]   = offdiagA[r];
2315       if (idx) idx[r] = offdiagIdx[r];
2316     }
2317   }
2318   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2319   PetscCall(VecRestoreArrayWrite(v, &a));
2320   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2321   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2322   PetscCall(VecDestroy(&diagV));
2323   PetscCall(VecDestroy(&offdiagV));
2324   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2329 {
2330   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2331   PetscInt          m = A->rmap->n,n = A->cmap->n;
2332   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2333   PetscInt          *cmap  = mat->garray;
2334   PetscInt          *diagIdx, *offdiagIdx;
2335   Vec               diagV, offdiagV;
2336   PetscScalar       *a, *diagA, *offdiagA;
2337   const PetscScalar *ba,*bav;
2338   PetscInt          r,j,col,ncols,*bi,*bj;
2339   Mat               B = mat->B;
2340   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2341 
2342   PetscFunctionBegin;
2343   /* When a process holds entire A and other processes have no entry */
2344   if (A->cmap->N == n) {
2345     PetscCall(VecGetArrayWrite(v,&diagA));
2346     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2347     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2348     PetscCall(VecDestroy(&diagV));
2349     PetscCall(VecRestoreArrayWrite(v,&diagA));
2350     PetscFunctionReturn(0);
2351   } else if (n == 0) {
2352     if (m) {
2353       PetscCall(VecGetArrayWrite(v,&a));
2354       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2355       PetscCall(VecRestoreArrayWrite(v,&a));
2356     }
2357     PetscFunctionReturn(0);
2358   }
2359 
2360   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2361   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2362   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2363   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2364 
2365   /* Get offdiagIdx[] for implicit 0.0 */
2366   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2367   ba   = bav;
2368   bi   = b->i;
2369   bj   = b->j;
2370   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2371   for (r = 0; r < m; r++) {
2372     ncols = bi[r+1] - bi[r];
2373     if (ncols == A->cmap->N - n) { /* Brow is dense */
2374       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2375     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2376       offdiagA[r] = 0.0;
2377 
2378       /* Find first hole in the cmap */
2379       for (j=0; j<ncols; j++) {
2380         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2381         if (col > j && j < cstart) {
2382           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2383           break;
2384         } else if (col > j + n && j >= cstart) {
2385           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2386           break;
2387         }
2388       }
2389       if (j == ncols && ncols < A->cmap->N - n) {
2390         /* a hole is outside compressed Bcols */
2391         if (ncols == 0) {
2392           if (cstart) {
2393             offdiagIdx[r] = 0;
2394           } else offdiagIdx[r] = cend;
2395         } else { /* ncols > 0 */
2396           offdiagIdx[r] = cmap[ncols-1] + 1;
2397           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2398         }
2399       }
2400     }
2401 
2402     for (j=0; j<ncols; j++) {
2403       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2404       ba++; bj++;
2405     }
2406   }
2407 
2408   PetscCall(VecGetArrayWrite(v, &a));
2409   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2410   for (r = 0; r < m; ++r) {
2411     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2412       a[r]   = diagA[r];
2413       if (idx) idx[r] = cstart + diagIdx[r];
2414     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2415       a[r] = diagA[r];
2416       if (idx) {
2417         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2418           idx[r] = cstart + diagIdx[r];
2419         } else idx[r] = offdiagIdx[r];
2420       }
2421     } else {
2422       a[r]   = offdiagA[r];
2423       if (idx) idx[r] = offdiagIdx[r];
2424     }
2425   }
2426   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2427   PetscCall(VecRestoreArrayWrite(v, &a));
2428   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2429   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2430   PetscCall(VecDestroy(&diagV));
2431   PetscCall(VecDestroy(&offdiagV));
2432   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2437 {
2438   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2439   PetscInt          m = A->rmap->n,n = A->cmap->n;
2440   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2441   PetscInt          *cmap  = mat->garray;
2442   PetscInt          *diagIdx, *offdiagIdx;
2443   Vec               diagV, offdiagV;
2444   PetscScalar       *a, *diagA, *offdiagA;
2445   const PetscScalar *ba,*bav;
2446   PetscInt          r,j,col,ncols,*bi,*bj;
2447   Mat               B = mat->B;
2448   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2449 
2450   PetscFunctionBegin;
2451   /* When a process holds entire A and other processes have no entry */
2452   if (A->cmap->N == n) {
2453     PetscCall(VecGetArrayWrite(v,&diagA));
2454     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2455     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2456     PetscCall(VecDestroy(&diagV));
2457     PetscCall(VecRestoreArrayWrite(v,&diagA));
2458     PetscFunctionReturn(0);
2459   } else if (n == 0) {
2460     if (m) {
2461       PetscCall(VecGetArrayWrite(v,&a));
2462       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2463       PetscCall(VecRestoreArrayWrite(v,&a));
2464     }
2465     PetscFunctionReturn(0);
2466   }
2467 
2468   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2469   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2470   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2471   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2472 
2473   /* Get offdiagIdx[] for implicit 0.0 */
2474   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2475   ba   = bav;
2476   bi   = b->i;
2477   bj   = b->j;
2478   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2479   for (r = 0; r < m; r++) {
2480     ncols = bi[r+1] - bi[r];
2481     if (ncols == A->cmap->N - n) { /* Brow is dense */
2482       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2483     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2484       offdiagA[r] = 0.0;
2485 
2486       /* Find first hole in the cmap */
2487       for (j=0; j<ncols; j++) {
2488         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2489         if (col > j && j < cstart) {
2490           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2491           break;
2492         } else if (col > j + n && j >= cstart) {
2493           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2494           break;
2495         }
2496       }
2497       if (j == ncols && ncols < A->cmap->N - n) {
2498         /* a hole is outside compressed Bcols */
2499         if (ncols == 0) {
2500           if (cstart) {
2501             offdiagIdx[r] = 0;
2502           } else offdiagIdx[r] = cend;
2503         } else { /* ncols > 0 */
2504           offdiagIdx[r] = cmap[ncols-1] + 1;
2505           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2506         }
2507       }
2508     }
2509 
2510     for (j=0; j<ncols; j++) {
2511       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2512       ba++; bj++;
2513     }
2514   }
2515 
2516   PetscCall(VecGetArrayWrite(v,    &a));
2517   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2518   for (r = 0; r < m; ++r) {
2519     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2520       a[r] = diagA[r];
2521       if (idx) idx[r] = cstart + diagIdx[r];
2522     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2523       a[r] = diagA[r];
2524       if (idx) {
2525         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2526           idx[r] = cstart + diagIdx[r];
2527         } else idx[r] = offdiagIdx[r];
2528       }
2529     } else {
2530       a[r] = offdiagA[r];
2531       if (idx) idx[r] = offdiagIdx[r];
2532     }
2533   }
2534   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2535   PetscCall(VecRestoreArrayWrite(v,       &a));
2536   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2537   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2538   PetscCall(VecDestroy(&diagV));
2539   PetscCall(VecDestroy(&offdiagV));
2540   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2545 {
2546   Mat            *dummy;
2547 
2548   PetscFunctionBegin;
2549   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2550   *newmat = *dummy;
2551   PetscCall(PetscFree(dummy));
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2556 {
2557   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2558 
2559   PetscFunctionBegin;
2560   PetscCall(MatInvertBlockDiagonal(a->A,values));
2561   A->factorerrortype = a->A->factorerrortype;
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2566 {
2567   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2568 
2569   PetscFunctionBegin;
2570   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2571   PetscCall(MatSetRandom(aij->A,rctx));
2572   if (x->assembled) {
2573     PetscCall(MatSetRandom(aij->B,rctx));
2574   } else {
2575     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2576   }
2577   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2578   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2583 {
2584   PetscFunctionBegin;
2585   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2586   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 /*@
2591    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2592 
2593    Not collective
2594 
2595    Input Parameter:
2596 .    A - the matrix
2597 
2598    Output Parameter:
2599 .    nz - the number of nonzeros
2600 
2601  Level: advanced
2602 
2603 @*/
2604 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz)
2605 {
2606   Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data;
2607   Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data;
2608 
2609   PetscFunctionBegin;
2610   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 /*@
2615    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2616 
2617    Collective on Mat
2618 
2619    Input Parameters:
2620 +    A - the matrix
2621 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2622 
2623  Level: advanced
2624 
2625 @*/
2626 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2627 {
2628   PetscFunctionBegin;
2629   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2634 {
2635   PetscBool            sc = PETSC_FALSE,flg;
2636 
2637   PetscFunctionBegin;
2638   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2639   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2640   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2641   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2642   PetscOptionsHeadEnd();
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2647 {
2648   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2649   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2650 
2651   PetscFunctionBegin;
2652   if (!Y->preallocated) {
2653     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2654   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2655     PetscInt nonew = aij->nonew;
2656     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2657     aij->nonew = nonew;
2658   }
2659   PetscCall(MatShift_Basic(Y,a));
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2664 {
2665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2666 
2667   PetscFunctionBegin;
2668   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2669   PetscCall(MatMissingDiagonal(a->A,missing,d));
2670   if (d) {
2671     PetscInt rstart;
2672     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2673     *d += rstart;
2674 
2675   }
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2680 {
2681   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2682 
2683   PetscFunctionBegin;
2684   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 /* -------------------------------------------------------------------*/
2689 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2690                                        MatGetRow_MPIAIJ,
2691                                        MatRestoreRow_MPIAIJ,
2692                                        MatMult_MPIAIJ,
2693                                 /* 4*/ MatMultAdd_MPIAIJ,
2694                                        MatMultTranspose_MPIAIJ,
2695                                        MatMultTransposeAdd_MPIAIJ,
2696                                        NULL,
2697                                        NULL,
2698                                        NULL,
2699                                 /*10*/ NULL,
2700                                        NULL,
2701                                        NULL,
2702                                        MatSOR_MPIAIJ,
2703                                        MatTranspose_MPIAIJ,
2704                                 /*15*/ MatGetInfo_MPIAIJ,
2705                                        MatEqual_MPIAIJ,
2706                                        MatGetDiagonal_MPIAIJ,
2707                                        MatDiagonalScale_MPIAIJ,
2708                                        MatNorm_MPIAIJ,
2709                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2710                                        MatAssemblyEnd_MPIAIJ,
2711                                        MatSetOption_MPIAIJ,
2712                                        MatZeroEntries_MPIAIJ,
2713                                 /*24*/ MatZeroRows_MPIAIJ,
2714                                        NULL,
2715                                        NULL,
2716                                        NULL,
2717                                        NULL,
2718                                 /*29*/ MatSetUp_MPIAIJ,
2719                                        NULL,
2720                                        NULL,
2721                                        MatGetDiagonalBlock_MPIAIJ,
2722                                        NULL,
2723                                 /*34*/ MatDuplicate_MPIAIJ,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                 /*39*/ MatAXPY_MPIAIJ,
2729                                        MatCreateSubMatrices_MPIAIJ,
2730                                        MatIncreaseOverlap_MPIAIJ,
2731                                        MatGetValues_MPIAIJ,
2732                                        MatCopy_MPIAIJ,
2733                                 /*44*/ MatGetRowMax_MPIAIJ,
2734                                        MatScale_MPIAIJ,
2735                                        MatShift_MPIAIJ,
2736                                        MatDiagonalSet_MPIAIJ,
2737                                        MatZeroRowsColumns_MPIAIJ,
2738                                 /*49*/ MatSetRandom_MPIAIJ,
2739                                        MatGetRowIJ_MPIAIJ,
2740                                        MatRestoreRowIJ_MPIAIJ,
2741                                        NULL,
2742                                        NULL,
2743                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2744                                        NULL,
2745                                        MatSetUnfactored_MPIAIJ,
2746                                        MatPermute_MPIAIJ,
2747                                        NULL,
2748                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2749                                        MatDestroy_MPIAIJ,
2750                                        MatView_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                 /*64*/ NULL,
2754                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2755                                        NULL,
2756                                        NULL,
2757                                        NULL,
2758                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2759                                        MatGetRowMinAbs_MPIAIJ,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                 /*75*/ MatFDColoringApply_AIJ,
2765                                        MatSetFromOptions_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        MatFindZeroDiagonals_MPIAIJ,
2769                                 /*80*/ NULL,
2770                                        NULL,
2771                                        NULL,
2772                                 /*83*/ MatLoad_MPIAIJ,
2773                                        MatIsSymmetric_MPIAIJ,
2774                                        NULL,
2775                                        NULL,
2776                                        NULL,
2777                                        NULL,
2778                                 /*89*/ NULL,
2779                                        NULL,
2780                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                        MatBindToCPU_MPIAIJ,
2788                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                        MatConjugate_MPIAIJ,
2792                                        NULL,
2793                                 /*104*/MatSetValuesRow_MPIAIJ,
2794                                        MatRealPart_MPIAIJ,
2795                                        MatImaginaryPart_MPIAIJ,
2796                                        NULL,
2797                                        NULL,
2798                                 /*109*/NULL,
2799                                        NULL,
2800                                        MatGetRowMin_MPIAIJ,
2801                                        NULL,
2802                                        MatMissingDiagonal_MPIAIJ,
2803                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2804                                        NULL,
2805                                        MatGetGhosts_MPIAIJ,
2806                                        NULL,
2807                                        NULL,
2808                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        MatGetMultiProcBlock_MPIAIJ,
2813                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2814                                        MatGetColumnReductions_MPIAIJ,
2815                                        MatInvertBlockDiagonal_MPIAIJ,
2816                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2817                                        MatCreateSubMatricesMPI_MPIAIJ,
2818                                 /*129*/NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2822                                        NULL,
2823                                 /*134*/NULL,
2824                                        NULL,
2825                                        NULL,
2826                                        NULL,
2827                                        NULL,
2828                                 /*139*/MatSetBlockSizes_MPIAIJ,
2829                                        NULL,
2830                                        NULL,
2831                                        MatFDColoringSetUp_MPIXAIJ,
2832                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2833                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2834                                 /*145*/NULL,
2835                                        NULL,
2836                                        NULL,
2837                                        MatCreateGraph_Simple_AIJ,
2838                                        MatFilter_AIJ,
2839                                 /*150*/NULL
2840 };
2841 
2842 /* ----------------------------------------------------------------------------------------*/
2843 
2844 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2845 {
2846   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2847 
2848   PetscFunctionBegin;
2849   PetscCall(MatStoreValues(aij->A));
2850   PetscCall(MatStoreValues(aij->B));
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2855 {
2856   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2857 
2858   PetscFunctionBegin;
2859   PetscCall(MatRetrieveValues(aij->A));
2860   PetscCall(MatRetrieveValues(aij->B));
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2865 {
2866   Mat_MPIAIJ     *b;
2867   PetscMPIInt    size;
2868 
2869   PetscFunctionBegin;
2870   PetscCall(PetscLayoutSetUp(B->rmap));
2871   PetscCall(PetscLayoutSetUp(B->cmap));
2872   b = (Mat_MPIAIJ*)B->data;
2873 
2874 #if defined(PETSC_USE_CTABLE)
2875   PetscCall(PetscTableDestroy(&b->colmap));
2876 #else
2877   PetscCall(PetscFree(b->colmap));
2878 #endif
2879   PetscCall(PetscFree(b->garray));
2880   PetscCall(VecDestroy(&b->lvec));
2881   PetscCall(VecScatterDestroy(&b->Mvctx));
2882 
2883   /* Because the B will have been resized we simply destroy it and create a new one each time */
2884   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2885   PetscCall(MatDestroy(&b->B));
2886   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2887   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2888   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2889   PetscCall(MatSetType(b->B,MATSEQAIJ));
2890   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2891 
2892   if (!B->preallocated) {
2893     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2894     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2895     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2896     PetscCall(MatSetType(b->A,MATSEQAIJ));
2897     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2898   }
2899 
2900   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2901   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2902   B->preallocated  = PETSC_TRUE;
2903   B->was_assembled = PETSC_FALSE;
2904   B->assembled     = PETSC_FALSE;
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2909 {
2910   Mat_MPIAIJ     *b;
2911 
2912   PetscFunctionBegin;
2913   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2914   PetscCall(PetscLayoutSetUp(B->rmap));
2915   PetscCall(PetscLayoutSetUp(B->cmap));
2916   b = (Mat_MPIAIJ*)B->data;
2917 
2918 #if defined(PETSC_USE_CTABLE)
2919   PetscCall(PetscTableDestroy(&b->colmap));
2920 #else
2921   PetscCall(PetscFree(b->colmap));
2922 #endif
2923   PetscCall(PetscFree(b->garray));
2924   PetscCall(VecDestroy(&b->lvec));
2925   PetscCall(VecScatterDestroy(&b->Mvctx));
2926 
2927   PetscCall(MatResetPreallocation(b->A));
2928   PetscCall(MatResetPreallocation(b->B));
2929   B->preallocated  = PETSC_TRUE;
2930   B->was_assembled = PETSC_FALSE;
2931   B->assembled = PETSC_FALSE;
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2936 {
2937   Mat            mat;
2938   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2939 
2940   PetscFunctionBegin;
2941   *newmat = NULL;
2942   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2943   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2944   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2945   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2946   a       = (Mat_MPIAIJ*)mat->data;
2947 
2948   mat->factortype   = matin->factortype;
2949   mat->assembled    = matin->assembled;
2950   mat->insertmode   = NOT_SET_VALUES;
2951   mat->preallocated = matin->preallocated;
2952 
2953   a->size         = oldmat->size;
2954   a->rank         = oldmat->rank;
2955   a->donotstash   = oldmat->donotstash;
2956   a->roworiented  = oldmat->roworiented;
2957   a->rowindices   = NULL;
2958   a->rowvalues    = NULL;
2959   a->getrowactive = PETSC_FALSE;
2960 
2961   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2962   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2963 
2964   if (oldmat->colmap) {
2965 #if defined(PETSC_USE_CTABLE)
2966     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2967 #else
2968     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2969     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2970     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2971 #endif
2972   } else a->colmap = NULL;
2973   if (oldmat->garray) {
2974     PetscInt len;
2975     len  = oldmat->B->cmap->n;
2976     PetscCall(PetscMalloc1(len+1,&a->garray));
2977     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2978     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2979   } else a->garray = NULL;
2980 
2981   /* It may happen MatDuplicate is called with a non-assembled matrix
2982      In fact, MatDuplicate only requires the matrix to be preallocated
2983      This may happen inside a DMCreateMatrix_Shell */
2984   if (oldmat->lvec) {
2985     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2986     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2987   }
2988   if (oldmat->Mvctx) {
2989     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2990     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2991   }
2992   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2993   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2994   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2995   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2996   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2997   *newmat = mat;
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3002 {
3003   PetscBool      isbinary, ishdf5;
3004 
3005   PetscFunctionBegin;
3006   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3007   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3008   /* force binary viewer to load .info file if it has not yet done so */
3009   PetscCall(PetscViewerSetUp(viewer));
3010   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3011   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
3012   if (isbinary) {
3013     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
3014   } else if (ishdf5) {
3015 #if defined(PETSC_HAVE_HDF5)
3016     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
3017 #else
3018     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3019 #endif
3020   } else {
3021     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3022   }
3023   PetscFunctionReturn(0);
3024 }
3025 
3026 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3027 {
3028   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3029   PetscInt       *rowidxs,*colidxs;
3030   PetscScalar    *matvals;
3031 
3032   PetscFunctionBegin;
3033   PetscCall(PetscViewerSetUp(viewer));
3034 
3035   /* read in matrix header */
3036   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3037   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3038   M  = header[1]; N = header[2]; nz = header[3];
3039   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3040   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3041   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3042 
3043   /* set block sizes from the viewer's .info file */
3044   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3045   /* set global sizes if not set already */
3046   if (mat->rmap->N < 0) mat->rmap->N = M;
3047   if (mat->cmap->N < 0) mat->cmap->N = N;
3048   PetscCall(PetscLayoutSetUp(mat->rmap));
3049   PetscCall(PetscLayoutSetUp(mat->cmap));
3050 
3051   /* check if the matrix sizes are correct */
3052   PetscCall(MatGetSize(mat,&rows,&cols));
3053   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3054 
3055   /* read in row lengths and build row indices */
3056   PetscCall(MatGetLocalSize(mat,&m,NULL));
3057   PetscCall(PetscMalloc1(m+1,&rowidxs));
3058   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3059   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3060   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3061   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3062   /* read in column indices and matrix values */
3063   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3064   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3065   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3066   /* store matrix indices and values */
3067   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3068   PetscCall(PetscFree(rowidxs));
3069   PetscCall(PetscFree2(colidxs,matvals));
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 /* Not scalable because of ISAllGather() unless getting all columns. */
3074 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3075 {
3076   IS             iscol_local;
3077   PetscBool      isstride;
3078   PetscMPIInt    lisstride=0,gisstride;
3079 
3080   PetscFunctionBegin;
3081   /* check if we are grabbing all columns*/
3082   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3083 
3084   if (isstride) {
3085     PetscInt  start,len,mstart,mlen;
3086     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3087     PetscCall(ISGetLocalSize(iscol,&len));
3088     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3089     if (mstart == start && mlen-mstart == len) lisstride = 1;
3090   }
3091 
3092   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3093   if (gisstride) {
3094     PetscInt N;
3095     PetscCall(MatGetSize(mat,NULL,&N));
3096     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3097     PetscCall(ISSetIdentity(iscol_local));
3098     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3099   } else {
3100     PetscInt cbs;
3101     PetscCall(ISGetBlockSize(iscol,&cbs));
3102     PetscCall(ISAllGather(iscol,&iscol_local));
3103     PetscCall(ISSetBlockSize(iscol_local,cbs));
3104   }
3105 
3106   *isseq = iscol_local;
3107   PetscFunctionReturn(0);
3108 }
3109 
3110 /*
3111  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3112  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3113 
3114  Input Parameters:
3115    mat - matrix
3116    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3117            i.e., mat->rstart <= isrow[i] < mat->rend
3118    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3119            i.e., mat->cstart <= iscol[i] < mat->cend
3120  Output Parameter:
3121    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3122    iscol_o - sequential column index set for retrieving mat->B
3123    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3124  */
3125 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3126 {
3127   Vec            x,cmap;
3128   const PetscInt *is_idx;
3129   PetscScalar    *xarray,*cmaparray;
3130   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3131   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3132   Mat            B=a->B;
3133   Vec            lvec=a->lvec,lcmap;
3134   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3135   MPI_Comm       comm;
3136   VecScatter     Mvctx=a->Mvctx;
3137 
3138   PetscFunctionBegin;
3139   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3140   PetscCall(ISGetLocalSize(iscol,&ncols));
3141 
3142   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3143   PetscCall(MatCreateVecs(mat,&x,NULL));
3144   PetscCall(VecSet(x,-1.0));
3145   PetscCall(VecDuplicate(x,&cmap));
3146   PetscCall(VecSet(cmap,-1.0));
3147 
3148   /* Get start indices */
3149   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3150   isstart -= ncols;
3151   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3152 
3153   PetscCall(ISGetIndices(iscol,&is_idx));
3154   PetscCall(VecGetArray(x,&xarray));
3155   PetscCall(VecGetArray(cmap,&cmaparray));
3156   PetscCall(PetscMalloc1(ncols,&idx));
3157   for (i=0; i<ncols; i++) {
3158     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3159     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3160     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3161   }
3162   PetscCall(VecRestoreArray(x,&xarray));
3163   PetscCall(VecRestoreArray(cmap,&cmaparray));
3164   PetscCall(ISRestoreIndices(iscol,&is_idx));
3165 
3166   /* Get iscol_d */
3167   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3168   PetscCall(ISGetBlockSize(iscol,&i));
3169   PetscCall(ISSetBlockSize(*iscol_d,i));
3170 
3171   /* Get isrow_d */
3172   PetscCall(ISGetLocalSize(isrow,&m));
3173   rstart = mat->rmap->rstart;
3174   PetscCall(PetscMalloc1(m,&idx));
3175   PetscCall(ISGetIndices(isrow,&is_idx));
3176   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3177   PetscCall(ISRestoreIndices(isrow,&is_idx));
3178 
3179   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3180   PetscCall(ISGetBlockSize(isrow,&i));
3181   PetscCall(ISSetBlockSize(*isrow_d,i));
3182 
3183   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3184   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3185   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3186 
3187   PetscCall(VecDuplicate(lvec,&lcmap));
3188 
3189   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3190   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3191 
3192   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3193   /* off-process column indices */
3194   count = 0;
3195   PetscCall(PetscMalloc1(Bn,&idx));
3196   PetscCall(PetscMalloc1(Bn,&cmap1));
3197 
3198   PetscCall(VecGetArray(lvec,&xarray));
3199   PetscCall(VecGetArray(lcmap,&cmaparray));
3200   for (i=0; i<Bn; i++) {
3201     if (PetscRealPart(xarray[i]) > -1.0) {
3202       idx[count]     = i;                   /* local column index in off-diagonal part B */
3203       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3204       count++;
3205     }
3206   }
3207   PetscCall(VecRestoreArray(lvec,&xarray));
3208   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3209 
3210   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3211   /* cannot ensure iscol_o has same blocksize as iscol! */
3212 
3213   PetscCall(PetscFree(idx));
3214   *garray = cmap1;
3215 
3216   PetscCall(VecDestroy(&x));
3217   PetscCall(VecDestroy(&cmap));
3218   PetscCall(VecDestroy(&lcmap));
3219   PetscFunctionReturn(0);
3220 }
3221 
3222 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3223 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3224 {
3225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3226   Mat            M = NULL;
3227   MPI_Comm       comm;
3228   IS             iscol_d,isrow_d,iscol_o;
3229   Mat            Asub = NULL,Bsub = NULL;
3230   PetscInt       n;
3231 
3232   PetscFunctionBegin;
3233   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3234 
3235   if (call == MAT_REUSE_MATRIX) {
3236     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3237     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3238     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3239 
3240     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3241     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3242 
3243     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3244     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3245 
3246     /* Update diagonal and off-diagonal portions of submat */
3247     asub = (Mat_MPIAIJ*)(*submat)->data;
3248     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3249     PetscCall(ISGetLocalSize(iscol_o,&n));
3250     if (n) {
3251       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3252     }
3253     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3254     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3255 
3256   } else { /* call == MAT_INITIAL_MATRIX) */
3257     const PetscInt *garray;
3258     PetscInt        BsubN;
3259 
3260     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3261     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3262 
3263     /* Create local submatrices Asub and Bsub */
3264     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3265     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3266 
3267     /* Create submatrix M */
3268     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3269 
3270     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3271     asub = (Mat_MPIAIJ*)M->data;
3272 
3273     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3274     n = asub->B->cmap->N;
3275     if (BsubN > n) {
3276       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3277       const PetscInt *idx;
3278       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3279       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3280 
3281       PetscCall(PetscMalloc1(n,&idx_new));
3282       j = 0;
3283       PetscCall(ISGetIndices(iscol_o,&idx));
3284       for (i=0; i<n; i++) {
3285         if (j >= BsubN) break;
3286         while (subgarray[i] > garray[j]) j++;
3287 
3288         if (subgarray[i] == garray[j]) {
3289           idx_new[i] = idx[j++];
3290         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3291       }
3292       PetscCall(ISRestoreIndices(iscol_o,&idx));
3293 
3294       PetscCall(ISDestroy(&iscol_o));
3295       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3296 
3297     } else if (BsubN < n) {
3298       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3299     }
3300 
3301     PetscCall(PetscFree(garray));
3302     *submat = M;
3303 
3304     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3305     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3306     PetscCall(ISDestroy(&isrow_d));
3307 
3308     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3309     PetscCall(ISDestroy(&iscol_d));
3310 
3311     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3312     PetscCall(ISDestroy(&iscol_o));
3313   }
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3318 {
3319   IS             iscol_local=NULL,isrow_d;
3320   PetscInt       csize;
3321   PetscInt       n,i,j,start,end;
3322   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3323   MPI_Comm       comm;
3324 
3325   PetscFunctionBegin;
3326   /* If isrow has same processor distribution as mat,
3327      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3328   if (call == MAT_REUSE_MATRIX) {
3329     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3330     if (isrow_d) {
3331       sameRowDist  = PETSC_TRUE;
3332       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3333     } else {
3334       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3335       if (iscol_local) {
3336         sameRowDist  = PETSC_TRUE;
3337         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3338       }
3339     }
3340   } else {
3341     /* Check if isrow has same processor distribution as mat */
3342     sameDist[0] = PETSC_FALSE;
3343     PetscCall(ISGetLocalSize(isrow,&n));
3344     if (!n) {
3345       sameDist[0] = PETSC_TRUE;
3346     } else {
3347       PetscCall(ISGetMinMax(isrow,&i,&j));
3348       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3349       if (i >= start && j < end) {
3350         sameDist[0] = PETSC_TRUE;
3351       }
3352     }
3353 
3354     /* Check if iscol has same processor distribution as mat */
3355     sameDist[1] = PETSC_FALSE;
3356     PetscCall(ISGetLocalSize(iscol,&n));
3357     if (!n) {
3358       sameDist[1] = PETSC_TRUE;
3359     } else {
3360       PetscCall(ISGetMinMax(iscol,&i,&j));
3361       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3362       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3363     }
3364 
3365     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3366     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3367     sameRowDist = tsameDist[0];
3368   }
3369 
3370   if (sameRowDist) {
3371     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3372       /* isrow and iscol have same processor distribution as mat */
3373       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3374       PetscFunctionReturn(0);
3375     } else { /* sameRowDist */
3376       /* isrow has same processor distribution as mat */
3377       if (call == MAT_INITIAL_MATRIX) {
3378         PetscBool sorted;
3379         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3380         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3381         PetscCall(ISGetSize(iscol,&i));
3382         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3383 
3384         PetscCall(ISSorted(iscol_local,&sorted));
3385         if (sorted) {
3386           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3387           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3388           PetscFunctionReturn(0);
3389         }
3390       } else { /* call == MAT_REUSE_MATRIX */
3391         IS iscol_sub;
3392         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3393         if (iscol_sub) {
3394           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3395           PetscFunctionReturn(0);
3396         }
3397       }
3398     }
3399   }
3400 
3401   /* General case: iscol -> iscol_local which has global size of iscol */
3402   if (call == MAT_REUSE_MATRIX) {
3403     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3404     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3405   } else {
3406     if (!iscol_local) {
3407       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3408     }
3409   }
3410 
3411   PetscCall(ISGetLocalSize(iscol,&csize));
3412   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3413 
3414   if (call == MAT_INITIAL_MATRIX) {
3415     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3416     PetscCall(ISDestroy(&iscol_local));
3417   }
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 /*@C
3422      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3423          and "off-diagonal" part of the matrix in CSR format.
3424 
3425    Collective
3426 
3427    Input Parameters:
3428 +  comm - MPI communicator
3429 .  A - "diagonal" portion of matrix
3430 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3431 -  garray - global index of B columns
3432 
3433    Output Parameter:
3434 .   mat - the matrix, with input A as its local diagonal matrix
3435    Level: advanced
3436 
3437    Notes:
3438        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3439        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3440 
3441 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3442 @*/
3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3444 {
3445   Mat_MPIAIJ        *maij;
3446   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3447   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3448   const PetscScalar *oa;
3449   Mat               Bnew;
3450   PetscInt          m,n,N;
3451   MatType           mpi_mat_type;
3452 
3453   PetscFunctionBegin;
3454   PetscCall(MatCreate(comm,mat));
3455   PetscCall(MatGetSize(A,&m,&n));
3456   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3457   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3458   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3459   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3460 
3461   /* Get global columns of mat */
3462   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3463 
3464   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3465   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3466   PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type));
3467   PetscCall(MatSetType(*mat,mpi_mat_type));
3468 
3469   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3470   maij = (Mat_MPIAIJ*)(*mat)->data;
3471 
3472   (*mat)->preallocated = PETSC_TRUE;
3473 
3474   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3475   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3476 
3477   /* Set A as diagonal portion of *mat */
3478   maij->A = A;
3479 
3480   nz = oi[m];
3481   for (i=0; i<nz; i++) {
3482     col   = oj[i];
3483     oj[i] = garray[col];
3484   }
3485 
3486   /* Set Bnew as off-diagonal portion of *mat */
3487   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3488   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3489   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3490   bnew        = (Mat_SeqAIJ*)Bnew->data;
3491   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3492   maij->B     = Bnew;
3493 
3494   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3495 
3496   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3497   b->free_a       = PETSC_FALSE;
3498   b->free_ij      = PETSC_FALSE;
3499   PetscCall(MatDestroy(&B));
3500 
3501   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3502   bnew->free_a       = PETSC_TRUE;
3503   bnew->free_ij      = PETSC_TRUE;
3504 
3505   /* condense columns of maij->B */
3506   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3507   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3508   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3509   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3510   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3515 
3516 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3517 {
3518   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3519   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3520   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3521   Mat            M,Msub,B=a->B;
3522   MatScalar      *aa;
3523   Mat_SeqAIJ     *aij;
3524   PetscInt       *garray = a->garray,*colsub,Ncols;
3525   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3526   IS             iscol_sub,iscmap;
3527   const PetscInt *is_idx,*cmap;
3528   PetscBool      allcolumns=PETSC_FALSE;
3529   MPI_Comm       comm;
3530 
3531   PetscFunctionBegin;
3532   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3533   if (call == MAT_REUSE_MATRIX) {
3534     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3535     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3536     PetscCall(ISGetLocalSize(iscol_sub,&count));
3537 
3538     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3539     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3540 
3541     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3542     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3543 
3544     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3545 
3546   } else { /* call == MAT_INITIAL_MATRIX) */
3547     PetscBool flg;
3548 
3549     PetscCall(ISGetLocalSize(iscol,&n));
3550     PetscCall(ISGetSize(iscol,&Ncols));
3551 
3552     /* (1) iscol -> nonscalable iscol_local */
3553     /* Check for special case: each processor gets entire matrix columns */
3554     PetscCall(ISIdentity(iscol_local,&flg));
3555     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3556     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3557     if (allcolumns) {
3558       iscol_sub = iscol_local;
3559       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3560       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3561 
3562     } else {
3563       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3564       PetscInt *idx,*cmap1,k;
3565       PetscCall(PetscMalloc1(Ncols,&idx));
3566       PetscCall(PetscMalloc1(Ncols,&cmap1));
3567       PetscCall(ISGetIndices(iscol_local,&is_idx));
3568       count = 0;
3569       k     = 0;
3570       for (i=0; i<Ncols; i++) {
3571         j = is_idx[i];
3572         if (j >= cstart && j < cend) {
3573           /* diagonal part of mat */
3574           idx[count]     = j;
3575           cmap1[count++] = i; /* column index in submat */
3576         } else if (Bn) {
3577           /* off-diagonal part of mat */
3578           if (j == garray[k]) {
3579             idx[count]     = j;
3580             cmap1[count++] = i;  /* column index in submat */
3581           } else if (j > garray[k]) {
3582             while (j > garray[k] && k < Bn-1) k++;
3583             if (j == garray[k]) {
3584               idx[count]     = j;
3585               cmap1[count++] = i; /* column index in submat */
3586             }
3587           }
3588         }
3589       }
3590       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3591 
3592       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3593       PetscCall(ISGetBlockSize(iscol,&cbs));
3594       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3595 
3596       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3597     }
3598 
3599     /* (3) Create sequential Msub */
3600     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3601   }
3602 
3603   PetscCall(ISGetLocalSize(iscol_sub,&count));
3604   aij  = (Mat_SeqAIJ*)(Msub)->data;
3605   ii   = aij->i;
3606   PetscCall(ISGetIndices(iscmap,&cmap));
3607 
3608   /*
3609       m - number of local rows
3610       Ncols - number of columns (same on all processors)
3611       rstart - first row in new global matrix generated
3612   */
3613   PetscCall(MatGetSize(Msub,&m,NULL));
3614 
3615   if (call == MAT_INITIAL_MATRIX) {
3616     /* (4) Create parallel newmat */
3617     PetscMPIInt    rank,size;
3618     PetscInt       csize;
3619 
3620     PetscCallMPI(MPI_Comm_size(comm,&size));
3621     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3622 
3623     /*
3624         Determine the number of non-zeros in the diagonal and off-diagonal
3625         portions of the matrix in order to do correct preallocation
3626     */
3627 
3628     /* first get start and end of "diagonal" columns */
3629     PetscCall(ISGetLocalSize(iscol,&csize));
3630     if (csize == PETSC_DECIDE) {
3631       PetscCall(ISGetSize(isrow,&mglobal));
3632       if (mglobal == Ncols) { /* square matrix */
3633         nlocal = m;
3634       } else {
3635         nlocal = Ncols/size + ((Ncols % size) > rank);
3636       }
3637     } else {
3638       nlocal = csize;
3639     }
3640     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3641     rstart = rend - nlocal;
3642     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3643 
3644     /* next, compute all the lengths */
3645     jj    = aij->j;
3646     PetscCall(PetscMalloc1(2*m+1,&dlens));
3647     olens = dlens + m;
3648     for (i=0; i<m; i++) {
3649       jend = ii[i+1] - ii[i];
3650       olen = 0;
3651       dlen = 0;
3652       for (j=0; j<jend; j++) {
3653         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3654         else dlen++;
3655         jj++;
3656       }
3657       olens[i] = olen;
3658       dlens[i] = dlen;
3659     }
3660 
3661     PetscCall(ISGetBlockSize(isrow,&bs));
3662     PetscCall(ISGetBlockSize(iscol,&cbs));
3663 
3664     PetscCall(MatCreate(comm,&M));
3665     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3666     PetscCall(MatSetBlockSizes(M,bs,cbs));
3667     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3668     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3669     PetscCall(PetscFree(dlens));
3670 
3671   } else { /* call == MAT_REUSE_MATRIX */
3672     M    = *newmat;
3673     PetscCall(MatGetLocalSize(M,&i,NULL));
3674     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3675     PetscCall(MatZeroEntries(M));
3676     /*
3677          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3678        rather than the slower MatSetValues().
3679     */
3680     M->was_assembled = PETSC_TRUE;
3681     M->assembled     = PETSC_FALSE;
3682   }
3683 
3684   /* (5) Set values of Msub to *newmat */
3685   PetscCall(PetscMalloc1(count,&colsub));
3686   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3687 
3688   jj   = aij->j;
3689   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3690   for (i=0; i<m; i++) {
3691     row = rstart + i;
3692     nz  = ii[i+1] - ii[i];
3693     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3694     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3695     jj += nz; aa += nz;
3696   }
3697   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3698   PetscCall(ISRestoreIndices(iscmap,&cmap));
3699 
3700   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3701   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3702 
3703   PetscCall(PetscFree(colsub));
3704 
3705   /* save Msub, iscol_sub and iscmap used in processor for next request */
3706   if (call == MAT_INITIAL_MATRIX) {
3707     *newmat = M;
3708     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3709     PetscCall(MatDestroy(&Msub));
3710 
3711     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3712     PetscCall(ISDestroy(&iscol_sub));
3713 
3714     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3715     PetscCall(ISDestroy(&iscmap));
3716 
3717     if (iscol_local) {
3718       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3719       PetscCall(ISDestroy(&iscol_local));
3720     }
3721   }
3722   PetscFunctionReturn(0);
3723 }
3724 
3725 /*
3726     Not great since it makes two copies of the submatrix, first an SeqAIJ
3727   in local and then by concatenating the local matrices the end result.
3728   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3729 
3730   Note: This requires a sequential iscol with all indices.
3731 */
3732 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3733 {
3734   PetscMPIInt    rank,size;
3735   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3736   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3737   Mat            M,Mreuse;
3738   MatScalar      *aa,*vwork;
3739   MPI_Comm       comm;
3740   Mat_SeqAIJ     *aij;
3741   PetscBool      colflag,allcolumns=PETSC_FALSE;
3742 
3743   PetscFunctionBegin;
3744   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3745   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3746   PetscCallMPI(MPI_Comm_size(comm,&size));
3747 
3748   /* Check for special case: each processor gets entire matrix columns */
3749   PetscCall(ISIdentity(iscol,&colflag));
3750   PetscCall(ISGetLocalSize(iscol,&n));
3751   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3752   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3753 
3754   if (call ==  MAT_REUSE_MATRIX) {
3755     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3756     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3757     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3758   } else {
3759     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3760   }
3761 
3762   /*
3763       m - number of local rows
3764       n - number of columns (same on all processors)
3765       rstart - first row in new global matrix generated
3766   */
3767   PetscCall(MatGetSize(Mreuse,&m,&n));
3768   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3769   if (call == MAT_INITIAL_MATRIX) {
3770     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3771     ii  = aij->i;
3772     jj  = aij->j;
3773 
3774     /*
3775         Determine the number of non-zeros in the diagonal and off-diagonal
3776         portions of the matrix in order to do correct preallocation
3777     */
3778 
3779     /* first get start and end of "diagonal" columns */
3780     if (csize == PETSC_DECIDE) {
3781       PetscCall(ISGetSize(isrow,&mglobal));
3782       if (mglobal == n) { /* square matrix */
3783         nlocal = m;
3784       } else {
3785         nlocal = n/size + ((n % size) > rank);
3786       }
3787     } else {
3788       nlocal = csize;
3789     }
3790     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3791     rstart = rend - nlocal;
3792     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3793 
3794     /* next, compute all the lengths */
3795     PetscCall(PetscMalloc1(2*m+1,&dlens));
3796     olens = dlens + m;
3797     for (i=0; i<m; i++) {
3798       jend = ii[i+1] - ii[i];
3799       olen = 0;
3800       dlen = 0;
3801       for (j=0; j<jend; j++) {
3802         if (*jj < rstart || *jj >= rend) olen++;
3803         else dlen++;
3804         jj++;
3805       }
3806       olens[i] = olen;
3807       dlens[i] = dlen;
3808     }
3809     PetscCall(MatCreate(comm,&M));
3810     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3811     PetscCall(MatSetBlockSizes(M,bs,cbs));
3812     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3813     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3814     PetscCall(PetscFree(dlens));
3815   } else {
3816     PetscInt ml,nl;
3817 
3818     M    = *newmat;
3819     PetscCall(MatGetLocalSize(M,&ml,&nl));
3820     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3821     PetscCall(MatZeroEntries(M));
3822     /*
3823          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3824        rather than the slower MatSetValues().
3825     */
3826     M->was_assembled = PETSC_TRUE;
3827     M->assembled     = PETSC_FALSE;
3828   }
3829   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3830   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3831   ii   = aij->i;
3832   jj   = aij->j;
3833 
3834   /* trigger copy to CPU if needed */
3835   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3836   for (i=0; i<m; i++) {
3837     row   = rstart + i;
3838     nz    = ii[i+1] - ii[i];
3839     cwork = jj; jj += nz;
3840     vwork = aa; aa += nz;
3841     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3842   }
3843   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3844 
3845   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3846   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3847   *newmat = M;
3848 
3849   /* save submatrix used in processor for next request */
3850   if (call ==  MAT_INITIAL_MATRIX) {
3851     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3852     PetscCall(MatDestroy(&Mreuse));
3853   }
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3858 {
3859   PetscInt       m,cstart, cend,j,nnz,i,d,*ld;
3860   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3861   const PetscInt *JJ;
3862   PetscBool      nooffprocentries;
3863   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)B->data;
3864 
3865   PetscFunctionBegin;
3866   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3867 
3868   PetscCall(PetscLayoutSetUp(B->rmap));
3869   PetscCall(PetscLayoutSetUp(B->cmap));
3870   m      = B->rmap->n;
3871   cstart = B->cmap->rstart;
3872   cend   = B->cmap->rend;
3873   rstart = B->rmap->rstart;
3874 
3875   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3876 
3877   if (PetscDefined(USE_DEBUG)) {
3878     for (i=0; i<m; i++) {
3879       nnz = Ii[i+1]- Ii[i];
3880       JJ  = J + Ii[i];
3881       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3882       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3883       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3884     }
3885   }
3886 
3887   for (i=0; i<m; i++) {
3888     nnz     = Ii[i+1]- Ii[i];
3889     JJ      = J + Ii[i];
3890     nnz_max = PetscMax(nnz_max,nnz);
3891     d       = 0;
3892     for (j=0; j<nnz; j++) {
3893       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3894     }
3895     d_nnz[i] = d;
3896     o_nnz[i] = nnz - d;
3897   }
3898   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3899   PetscCall(PetscFree2(d_nnz,o_nnz));
3900 
3901   for (i=0; i<m; i++) {
3902     ii   = i + rstart;
3903     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3904   }
3905   nooffprocentries    = B->nooffprocentries;
3906   B->nooffprocentries = PETSC_TRUE;
3907   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3908   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3909   B->nooffprocentries = nooffprocentries;
3910 
3911   /* count number of entries below block diagonal */
3912   PetscCall(PetscFree(Aij->ld));
3913   PetscCall(PetscCalloc1(m,&ld));
3914   Aij->ld = ld;
3915   for (i=0; i<m; i++) {
3916     nnz  = Ii[i+1] - Ii[i];
3917     j     = 0;
3918     while  (j < nnz && J[j] < cstart) {j++;}
3919     ld[i] = j;
3920     J     += nnz;
3921   }
3922 
3923   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 /*@
3928    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3929    (the default parallel PETSc format).
3930 
3931    Collective
3932 
3933    Input Parameters:
3934 +  B - the matrix
3935 .  i - the indices into j for the start of each local row (starts with zero)
3936 .  j - the column indices for each local row (starts with zero)
3937 -  v - optional values in the matrix
3938 
3939    Level: developer
3940 
3941    Notes:
3942        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3943      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3944      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3945 
3946        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3947 
3948        The format which is used for the sparse matrix input, is equivalent to a
3949     row-major ordering.. i.e for the following matrix, the input data expected is
3950     as shown
3951 
3952 $        1 0 0
3953 $        2 0 3     P0
3954 $       -------
3955 $        4 5 6     P1
3956 $
3957 $     Process0 [P0]: rows_owned=[0,1]
3958 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3959 $        j =  {0,0,2}  [size = 3]
3960 $        v =  {1,2,3}  [size = 3]
3961 $
3962 $     Process1 [P1]: rows_owned=[2]
3963 $        i =  {0,3}    [size = nrow+1  = 1+1]
3964 $        j =  {0,1,2}  [size = 3]
3965 $        v =  {4,5,6}  [size = 3]
3966 
3967 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3968           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3969 @*/
3970 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3971 {
3972   PetscFunctionBegin;
3973   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3974   PetscFunctionReturn(0);
3975 }
3976 
3977 /*@C
3978    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3979    (the default parallel PETSc format).  For good matrix assembly performance
3980    the user should preallocate the matrix storage by setting the parameters
3981    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3982    performance can be increased by more than a factor of 50.
3983 
3984    Collective
3985 
3986    Input Parameters:
3987 +  B - the matrix
3988 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3989            (same value is used for all local rows)
3990 .  d_nnz - array containing the number of nonzeros in the various rows of the
3991            DIAGONAL portion of the local submatrix (possibly different for each row)
3992            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3993            The size of this array is equal to the number of local rows, i.e 'm'.
3994            For matrices that will be factored, you must leave room for (and set)
3995            the diagonal entry even if it is zero.
3996 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3997            submatrix (same value is used for all local rows).
3998 -  o_nnz - array containing the number of nonzeros in the various rows of the
3999            OFF-DIAGONAL portion of the local submatrix (possibly different for
4000            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4001            structure. The size of this array is equal to the number
4002            of local rows, i.e 'm'.
4003 
4004    If the *_nnz parameter is given then the *_nz parameter is ignored
4005 
4006    The AIJ format (also called the Yale sparse matrix format or
4007    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4008    storage.  The stored row and column indices begin with zero.
4009    See Users-Manual: ch_mat for details.
4010 
4011    The parallel matrix is partitioned such that the first m0 rows belong to
4012    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4013    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4014 
4015    The DIAGONAL portion of the local submatrix of a processor can be defined
4016    as the submatrix which is obtained by extraction the part corresponding to
4017    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4018    first row that belongs to the processor, r2 is the last row belonging to
4019    the this processor, and c1-c2 is range of indices of the local part of a
4020    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4021    common case of a square matrix, the row and column ranges are the same and
4022    the DIAGONAL part is also square. The remaining portion of the local
4023    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4024 
4025    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4026 
4027    You can call MatGetInfo() to get information on how effective the preallocation was;
4028    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4029    You can also run with the option -info and look for messages with the string
4030    malloc in them to see if additional memory allocation was needed.
4031 
4032    Example usage:
4033 
4034    Consider the following 8x8 matrix with 34 non-zero values, that is
4035    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4036    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4037    as follows:
4038 
4039 .vb
4040             1  2  0  |  0  3  0  |  0  4
4041     Proc0   0  5  6  |  7  0  0  |  8  0
4042             9  0 10  | 11  0  0  | 12  0
4043     -------------------------------------
4044            13  0 14  | 15 16 17  |  0  0
4045     Proc1   0 18  0  | 19 20 21  |  0  0
4046             0  0  0  | 22 23  0  | 24  0
4047     -------------------------------------
4048     Proc2  25 26 27  |  0  0 28  | 29  0
4049            30  0  0  | 31 32 33  |  0 34
4050 .ve
4051 
4052    This can be represented as a collection of submatrices as:
4053 
4054 .vb
4055       A B C
4056       D E F
4057       G H I
4058 .ve
4059 
4060    Where the submatrices A,B,C are owned by proc0, D,E,F are
4061    owned by proc1, G,H,I are owned by proc2.
4062 
4063    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4064    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4065    The 'M','N' parameters are 8,8, and have the same values on all procs.
4066 
4067    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4068    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4069    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4070    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4071    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4072    matrix, ans [DF] as another SeqAIJ matrix.
4073 
4074    When d_nz, o_nz parameters are specified, d_nz storage elements are
4075    allocated for every row of the local diagonal submatrix, and o_nz
4076    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4077    One way to choose d_nz and o_nz is to use the max nonzerors per local
4078    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4079    In this case, the values of d_nz,o_nz are:
4080 .vb
4081      proc0 : dnz = 2, o_nz = 2
4082      proc1 : dnz = 3, o_nz = 2
4083      proc2 : dnz = 1, o_nz = 4
4084 .ve
4085    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4086    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4087    for proc3. i.e we are using 12+15+10=37 storage locations to store
4088    34 values.
4089 
4090    When d_nnz, o_nnz parameters are specified, the storage is specified
4091    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4092    In the above case the values for d_nnz,o_nnz are:
4093 .vb
4094      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4095      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4096      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4097 .ve
4098    Here the space allocated is sum of all the above values i.e 34, and
4099    hence pre-allocation is perfect.
4100 
4101    Level: intermediate
4102 
4103 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4104           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4105 @*/
4106 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4107 {
4108   PetscFunctionBegin;
4109   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4110   PetscValidType(B,1);
4111   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4112   PetscFunctionReturn(0);
4113 }
4114 
4115 /*@
4116      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4117          CSR format for the local rows.
4118 
4119    Collective
4120 
4121    Input Parameters:
4122 +  comm - MPI communicator
4123 .  m - number of local rows (Cannot be PETSC_DECIDE)
4124 .  n - This value should be the same as the local size used in creating the
4125        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4126        calculated if N is given) For square matrices n is almost always m.
4127 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4128 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4129 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4130 .   j - column indices
4131 -   a - optional matrix values
4132 
4133    Output Parameter:
4134 .   mat - the matrix
4135 
4136    Level: intermediate
4137 
4138    Notes:
4139        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4140      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4141      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4142 
4143        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4144 
4145        The format which is used for the sparse matrix input, is equivalent to a
4146     row-major ordering.. i.e for the following matrix, the input data expected is
4147     as shown
4148 
4149        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4150 
4151 $        1 0 0
4152 $        2 0 3     P0
4153 $       -------
4154 $        4 5 6     P1
4155 $
4156 $     Process0 [P0]: rows_owned=[0,1]
4157 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4158 $        j =  {0,0,2}  [size = 3]
4159 $        v =  {1,2,3}  [size = 3]
4160 $
4161 $     Process1 [P1]: rows_owned=[2]
4162 $        i =  {0,3}    [size = nrow+1  = 1+1]
4163 $        j =  {0,1,2}  [size = 3]
4164 $        v =  {4,5,6}  [size = 3]
4165 
4166 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4167           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4168 @*/
4169 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4170 {
4171   PetscFunctionBegin;
4172   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4173   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4174   PetscCall(MatCreate(comm,mat));
4175   PetscCall(MatSetSizes(*mat,m,n,M,N));
4176   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4177   PetscCall(MatSetType(*mat,MATMPIAIJ));
4178   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4179   PetscFunctionReturn(0);
4180 }
4181 
4182 /*@
4183      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4184          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays()
4185 
4186      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4187 
4188    Collective
4189 
4190    Input Parameters:
4191 +  mat - the matrix
4192 .  m - number of local rows (Cannot be PETSC_DECIDE)
4193 .  n - This value should be the same as the local size used in creating the
4194        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4195        calculated if N is given) For square matrices n is almost always m.
4196 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4197 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4198 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4199 .  J - column indices
4200 -  v - matrix values
4201 
4202    Level: intermediate
4203 
4204 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4205           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4206 @*/
4207 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4208 {
4209   PetscInt       nnz,i;
4210   PetscBool      nooffprocentries;
4211   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4212   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4213   PetscScalar    *ad,*ao;
4214   PetscInt       ldi,Iii,md;
4215   const PetscInt *Adi = Ad->i;
4216   PetscInt       *ld = Aij->ld;
4217 
4218   PetscFunctionBegin;
4219   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4220   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4221   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4222   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4223 
4224   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4225   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4226 
4227   for (i=0; i<m; i++) {
4228     nnz  = Ii[i+1]- Ii[i];
4229     Iii  = Ii[i];
4230     ldi  = ld[i];
4231     md   = Adi[i+1]-Adi[i];
4232     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4233     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4234     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4235     ad  += md;
4236     ao  += nnz - md;
4237   }
4238   nooffprocentries      = mat->nooffprocentries;
4239   mat->nooffprocentries = PETSC_TRUE;
4240   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4241   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4242   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4243   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4244   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4245   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4246   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4247   mat->nooffprocentries = nooffprocentries;
4248   PetscFunctionReturn(0);
4249 }
4250 
4251 /*@
4252      MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values
4253 
4254    Collective
4255 
4256    Input Parameters:
4257 +  mat - the matrix
4258 -  v - matrix values, stored by row
4259 
4260    Level: intermediate
4261 
4262    Notes:
4263    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4264 
4265 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4266           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4267 @*/
4268 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[])
4269 {
4270   PetscInt       nnz,i,m;
4271   PetscBool      nooffprocentries;
4272   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4273   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4274   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4275   PetscScalar    *ad,*ao;
4276   const PetscInt *Adi = Ad->i,*Adj = Ao->i;
4277   PetscInt       ldi,Iii,md;
4278   PetscInt       *ld = Aij->ld;
4279 
4280   PetscFunctionBegin;
4281   m = mat->rmap->n;
4282 
4283   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4284   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4285   Iii = 0;
4286   for (i=0; i<m; i++) {
4287     nnz  = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i];
4288     ldi  = ld[i];
4289     md   = Adi[i+1]-Adi[i];
4290     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4291     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4292     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4293     ad  += md;
4294     ao  += nnz - md;
4295     Iii += nnz;
4296   }
4297   nooffprocentries      = mat->nooffprocentries;
4298   mat->nooffprocentries = PETSC_TRUE;
4299   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4300   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4301   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4302   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4303   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4304   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4305   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4306   mat->nooffprocentries = nooffprocentries;
4307   PetscFunctionReturn(0);
4308 }
4309 
4310 /*@C
4311    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4312    (the default parallel PETSc format).  For good matrix assembly performance
4313    the user should preallocate the matrix storage by setting the parameters
4314    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4315    performance can be increased by more than a factor of 50.
4316 
4317    Collective
4318 
4319    Input Parameters:
4320 +  comm - MPI communicator
4321 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4322            This value should be the same as the local size used in creating the
4323            y vector for the matrix-vector product y = Ax.
4324 .  n - This value should be the same as the local size used in creating the
4325        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4326        calculated if N is given) For square matrices n is almost always m.
4327 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4328 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4329 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4330            (same value is used for all local rows)
4331 .  d_nnz - array containing the number of nonzeros in the various rows of the
4332            DIAGONAL portion of the local submatrix (possibly different for each row)
4333            or NULL, if d_nz is used to specify the nonzero structure.
4334            The size of this array is equal to the number of local rows, i.e 'm'.
4335 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4336            submatrix (same value is used for all local rows).
4337 -  o_nnz - array containing the number of nonzeros in the various rows of the
4338            OFF-DIAGONAL portion of the local submatrix (possibly different for
4339            each row) or NULL, if o_nz is used to specify the nonzero
4340            structure. The size of this array is equal to the number
4341            of local rows, i.e 'm'.
4342 
4343    Output Parameter:
4344 .  A - the matrix
4345 
4346    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4347    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4348    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4349 
4350    Notes:
4351    If the *_nnz parameter is given then the *_nz parameter is ignored
4352 
4353    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4354    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4355    storage requirements for this matrix.
4356 
4357    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4358    processor than it must be used on all processors that share the object for
4359    that argument.
4360 
4361    The user MUST specify either the local or global matrix dimensions
4362    (possibly both).
4363 
4364    The parallel matrix is partitioned across processors such that the
4365    first m0 rows belong to process 0, the next m1 rows belong to
4366    process 1, the next m2 rows belong to process 2 etc.. where
4367    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4368    values corresponding to [m x N] submatrix.
4369 
4370    The columns are logically partitioned with the n0 columns belonging
4371    to 0th partition, the next n1 columns belonging to the next
4372    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4373 
4374    The DIAGONAL portion of the local submatrix on any given processor
4375    is the submatrix corresponding to the rows and columns m,n
4376    corresponding to the given processor. i.e diagonal matrix on
4377    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4378    etc. The remaining portion of the local submatrix [m x (N-n)]
4379    constitute the OFF-DIAGONAL portion. The example below better
4380    illustrates this concept.
4381 
4382    For a square global matrix we define each processor's diagonal portion
4383    to be its local rows and the corresponding columns (a square submatrix);
4384    each processor's off-diagonal portion encompasses the remainder of the
4385    local matrix (a rectangular submatrix).
4386 
4387    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4388 
4389    When calling this routine with a single process communicator, a matrix of
4390    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4391    type of communicator, use the construction mechanism
4392 .vb
4393      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4394 .ve
4395 
4396 $     MatCreate(...,&A);
4397 $     MatSetType(A,MATMPIAIJ);
4398 $     MatSetSizes(A, m,n,M,N);
4399 $     MatMPIAIJSetPreallocation(A,...);
4400 
4401    By default, this format uses inodes (identical nodes) when possible.
4402    We search for consecutive rows with the same nonzero structure, thereby
4403    reusing matrix information to achieve increased efficiency.
4404 
4405    Options Database Keys:
4406 +  -mat_no_inode  - Do not use inodes
4407 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4408 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4409         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4410         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4411 
4412    Example usage:
4413 
4414    Consider the following 8x8 matrix with 34 non-zero values, that is
4415    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4416    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4417    as follows
4418 
4419 .vb
4420             1  2  0  |  0  3  0  |  0  4
4421     Proc0   0  5  6  |  7  0  0  |  8  0
4422             9  0 10  | 11  0  0  | 12  0
4423     -------------------------------------
4424            13  0 14  | 15 16 17  |  0  0
4425     Proc1   0 18  0  | 19 20 21  |  0  0
4426             0  0  0  | 22 23  0  | 24  0
4427     -------------------------------------
4428     Proc2  25 26 27  |  0  0 28  | 29  0
4429            30  0  0  | 31 32 33  |  0 34
4430 .ve
4431 
4432    This can be represented as a collection of submatrices as
4433 
4434 .vb
4435       A B C
4436       D E F
4437       G H I
4438 .ve
4439 
4440    Where the submatrices A,B,C are owned by proc0, D,E,F are
4441    owned by proc1, G,H,I are owned by proc2.
4442 
4443    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4444    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4445    The 'M','N' parameters are 8,8, and have the same values on all procs.
4446 
4447    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4448    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4449    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4450    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4451    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4452    matrix, ans [DF] as another SeqAIJ matrix.
4453 
4454    When d_nz, o_nz parameters are specified, d_nz storage elements are
4455    allocated for every row of the local diagonal submatrix, and o_nz
4456    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4457    One way to choose d_nz and o_nz is to use the max nonzerors per local
4458    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4459    In this case, the values of d_nz,o_nz are
4460 .vb
4461      proc0 : dnz = 2, o_nz = 2
4462      proc1 : dnz = 3, o_nz = 2
4463      proc2 : dnz = 1, o_nz = 4
4464 .ve
4465    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4466    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4467    for proc3. i.e we are using 12+15+10=37 storage locations to store
4468    34 values.
4469 
4470    When d_nnz, o_nnz parameters are specified, the storage is specified
4471    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4472    In the above case the values for d_nnz,o_nnz are
4473 .vb
4474      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4475      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4476      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4477 .ve
4478    Here the space allocated is sum of all the above values i.e 34, and
4479    hence pre-allocation is perfect.
4480 
4481    Level: intermediate
4482 
4483 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4484           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4485 @*/
4486 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4487 {
4488   PetscMPIInt    size;
4489 
4490   PetscFunctionBegin;
4491   PetscCall(MatCreate(comm,A));
4492   PetscCall(MatSetSizes(*A,m,n,M,N));
4493   PetscCallMPI(MPI_Comm_size(comm,&size));
4494   if (size > 1) {
4495     PetscCall(MatSetType(*A,MATMPIAIJ));
4496     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4497   } else {
4498     PetscCall(MatSetType(*A,MATSEQAIJ));
4499     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4500   }
4501   PetscFunctionReturn(0);
4502 }
4503 
4504 /*@C
4505   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4506 
4507   Not collective
4508 
4509   Input Parameter:
4510 . A - The MPIAIJ matrix
4511 
4512   Output Parameters:
4513 + Ad - The local diagonal block as a SeqAIJ matrix
4514 . Ao - The local off-diagonal block as a SeqAIJ matrix
4515 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4516 
4517   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4518   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4519   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4520   local column numbers to global column numbers in the original matrix.
4521 
4522   Level: intermediate
4523 
4524 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4525 @*/
4526 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4527 {
4528   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4529   PetscBool      flg;
4530 
4531   PetscFunctionBegin;
4532   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4533   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4534   if (Ad)     *Ad     = a->A;
4535   if (Ao)     *Ao     = a->B;
4536   if (colmap) *colmap = a->garray;
4537   PetscFunctionReturn(0);
4538 }
4539 
4540 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4541 {
4542   PetscInt       m,N,i,rstart,nnz,Ii;
4543   PetscInt       *indx;
4544   PetscScalar    *values;
4545   MatType        rootType;
4546 
4547   PetscFunctionBegin;
4548   PetscCall(MatGetSize(inmat,&m,&N));
4549   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4550     PetscInt       *dnz,*onz,sum,bs,cbs;
4551 
4552     if (n == PETSC_DECIDE) {
4553       PetscCall(PetscSplitOwnership(comm,&n,&N));
4554     }
4555     /* Check sum(n) = N */
4556     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4557     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4558 
4559     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4560     rstart -= m;
4561 
4562     MatPreallocateBegin(comm,m,n,dnz,onz);
4563     for (i=0; i<m; i++) {
4564       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4565       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4566       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4567     }
4568 
4569     PetscCall(MatCreate(comm,outmat));
4570     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4571     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4572     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4573     PetscCall(MatGetRootType_Private(inmat,&rootType));
4574     PetscCall(MatSetType(*outmat,rootType));
4575     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4576     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4577     MatPreallocateEnd(dnz,onz);
4578     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4579   }
4580 
4581   /* numeric phase */
4582   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4583   for (i=0; i<m; i++) {
4584     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4585     Ii   = i + rstart;
4586     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4587     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4588   }
4589   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4590   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4591   PetscFunctionReturn(0);
4592 }
4593 
4594 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4595 {
4596   PetscMPIInt       rank;
4597   PetscInt          m,N,i,rstart,nnz;
4598   size_t            len;
4599   const PetscInt    *indx;
4600   PetscViewer       out;
4601   char              *name;
4602   Mat               B;
4603   const PetscScalar *values;
4604 
4605   PetscFunctionBegin;
4606   PetscCall(MatGetLocalSize(A,&m,NULL));
4607   PetscCall(MatGetSize(A,NULL,&N));
4608   /* Should this be the type of the diagonal block of A? */
4609   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4610   PetscCall(MatSetSizes(B,m,N,m,N));
4611   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4612   PetscCall(MatSetType(B,MATSEQAIJ));
4613   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4614   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4615   for (i=0; i<m; i++) {
4616     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4617     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4618     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4619   }
4620   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4621   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4622 
4623   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4624   PetscCall(PetscStrlen(outfile,&len));
4625   PetscCall(PetscMalloc1(len+6,&name));
4626   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4627   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4628   PetscCall(PetscFree(name));
4629   PetscCall(MatView(B,out));
4630   PetscCall(PetscViewerDestroy(&out));
4631   PetscCall(MatDestroy(&B));
4632   PetscFunctionReturn(0);
4633 }
4634 
4635 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4636 {
4637   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4638 
4639   PetscFunctionBegin;
4640   if (!merge) PetscFunctionReturn(0);
4641   PetscCall(PetscFree(merge->id_r));
4642   PetscCall(PetscFree(merge->len_s));
4643   PetscCall(PetscFree(merge->len_r));
4644   PetscCall(PetscFree(merge->bi));
4645   PetscCall(PetscFree(merge->bj));
4646   PetscCall(PetscFree(merge->buf_ri[0]));
4647   PetscCall(PetscFree(merge->buf_ri));
4648   PetscCall(PetscFree(merge->buf_rj[0]));
4649   PetscCall(PetscFree(merge->buf_rj));
4650   PetscCall(PetscFree(merge->coi));
4651   PetscCall(PetscFree(merge->coj));
4652   PetscCall(PetscFree(merge->owners_co));
4653   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4654   PetscCall(PetscFree(merge));
4655   PetscFunctionReturn(0);
4656 }
4657 
4658 #include <../src/mat/utils/freespace.h>
4659 #include <petscbt.h>
4660 
4661 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4662 {
4663   MPI_Comm            comm;
4664   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4665   PetscMPIInt         size,rank,taga,*len_s;
4666   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4667   PetscInt            proc,m;
4668   PetscInt            **buf_ri,**buf_rj;
4669   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4670   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4671   MPI_Request         *s_waits,*r_waits;
4672   MPI_Status          *status;
4673   const MatScalar     *aa,*a_a;
4674   MatScalar           **abuf_r,*ba_i;
4675   Mat_Merge_SeqsToMPI *merge;
4676   PetscContainer      container;
4677 
4678   PetscFunctionBegin;
4679   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4680   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4681 
4682   PetscCallMPI(MPI_Comm_size(comm,&size));
4683   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4684 
4685   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4686   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4687   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4688   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4689   aa   = a_a;
4690 
4691   bi     = merge->bi;
4692   bj     = merge->bj;
4693   buf_ri = merge->buf_ri;
4694   buf_rj = merge->buf_rj;
4695 
4696   PetscCall(PetscMalloc1(size,&status));
4697   owners = merge->rowmap->range;
4698   len_s  = merge->len_s;
4699 
4700   /* send and recv matrix values */
4701   /*-----------------------------*/
4702   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4703   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4704 
4705   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4706   for (proc=0,k=0; proc<size; proc++) {
4707     if (!len_s[proc]) continue;
4708     i    = owners[proc];
4709     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4710     k++;
4711   }
4712 
4713   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4714   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4715   PetscCall(PetscFree(status));
4716 
4717   PetscCall(PetscFree(s_waits));
4718   PetscCall(PetscFree(r_waits));
4719 
4720   /* insert mat values of mpimat */
4721   /*----------------------------*/
4722   PetscCall(PetscMalloc1(N,&ba_i));
4723   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4724 
4725   for (k=0; k<merge->nrecv; k++) {
4726     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4727     nrows       = *(buf_ri_k[k]);
4728     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4729     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4730   }
4731 
4732   /* set values of ba */
4733   m    = merge->rowmap->n;
4734   for (i=0; i<m; i++) {
4735     arow = owners[rank] + i;
4736     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4737     bnzi = bi[i+1] - bi[i];
4738     PetscCall(PetscArrayzero(ba_i,bnzi));
4739 
4740     /* add local non-zero vals of this proc's seqmat into ba */
4741     anzi   = ai[arow+1] - ai[arow];
4742     aj     = a->j + ai[arow];
4743     aa     = a_a + ai[arow];
4744     nextaj = 0;
4745     for (j=0; nextaj<anzi; j++) {
4746       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4747         ba_i[j] += aa[nextaj++];
4748       }
4749     }
4750 
4751     /* add received vals into ba */
4752     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4753       /* i-th row */
4754       if (i == *nextrow[k]) {
4755         anzi   = *(nextai[k]+1) - *nextai[k];
4756         aj     = buf_rj[k] + *(nextai[k]);
4757         aa     = abuf_r[k] + *(nextai[k]);
4758         nextaj = 0;
4759         for (j=0; nextaj<anzi; j++) {
4760           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4761             ba_i[j] += aa[nextaj++];
4762           }
4763         }
4764         nextrow[k]++; nextai[k]++;
4765       }
4766     }
4767     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4768   }
4769   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4770   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4771   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4772 
4773   PetscCall(PetscFree(abuf_r[0]));
4774   PetscCall(PetscFree(abuf_r));
4775   PetscCall(PetscFree(ba_i));
4776   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4777   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4778   PetscFunctionReturn(0);
4779 }
4780 
4781 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4782 {
4783   Mat                 B_mpi;
4784   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4785   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4786   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4787   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4788   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4789   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi;
4790   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4791   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4792   MPI_Status          *status;
4793   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4794   PetscBT             lnkbt;
4795   Mat_Merge_SeqsToMPI *merge;
4796   PetscContainer      container;
4797 
4798   PetscFunctionBegin;
4799   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4800 
4801   /* make sure it is a PETSc comm */
4802   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4803   PetscCallMPI(MPI_Comm_size(comm,&size));
4804   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4805 
4806   PetscCall(PetscNew(&merge));
4807   PetscCall(PetscMalloc1(size,&status));
4808 
4809   /* determine row ownership */
4810   /*---------------------------------------------------------*/
4811   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4812   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4813   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4814   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4815   PetscCall(PetscLayoutSetUp(merge->rowmap));
4816   PetscCall(PetscMalloc1(size,&len_si));
4817   PetscCall(PetscMalloc1(size,&merge->len_s));
4818 
4819   m      = merge->rowmap->n;
4820   owners = merge->rowmap->range;
4821 
4822   /* determine the number of messages to send, their lengths */
4823   /*---------------------------------------------------------*/
4824   len_s = merge->len_s;
4825 
4826   len          = 0; /* length of buf_si[] */
4827   merge->nsend = 0;
4828   for (proc=0; proc<size; proc++) {
4829     len_si[proc] = 0;
4830     if (proc == rank) {
4831       len_s[proc] = 0;
4832     } else {
4833       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4834       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4835     }
4836     if (len_s[proc]) {
4837       merge->nsend++;
4838       nrows = 0;
4839       for (i=owners[proc]; i<owners[proc+1]; i++) {
4840         if (ai[i+1] > ai[i]) nrows++;
4841       }
4842       len_si[proc] = 2*(nrows+1);
4843       len         += len_si[proc];
4844     }
4845   }
4846 
4847   /* determine the number and length of messages to receive for ij-structure */
4848   /*-------------------------------------------------------------------------*/
4849   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4850   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4851 
4852   /* post the Irecv of j-structure */
4853   /*-------------------------------*/
4854   PetscCall(PetscCommGetNewTag(comm,&tagj));
4855   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4856 
4857   /* post the Isend of j-structure */
4858   /*--------------------------------*/
4859   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4860 
4861   for (proc=0, k=0; proc<size; proc++) {
4862     if (!len_s[proc]) continue;
4863     i    = owners[proc];
4864     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4865     k++;
4866   }
4867 
4868   /* receives and sends of j-structure are complete */
4869   /*------------------------------------------------*/
4870   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4871   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4872 
4873   /* send and recv i-structure */
4874   /*---------------------------*/
4875   PetscCall(PetscCommGetNewTag(comm,&tagi));
4876   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4877 
4878   PetscCall(PetscMalloc1(len+1,&buf_s));
4879   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4880   for (proc=0,k=0; proc<size; proc++) {
4881     if (!len_s[proc]) continue;
4882     /* form outgoing message for i-structure:
4883          buf_si[0]:                 nrows to be sent
4884                [1:nrows]:           row index (global)
4885                [nrows+1:2*nrows+1]: i-structure index
4886     */
4887     /*-------------------------------------------*/
4888     nrows       = len_si[proc]/2 - 1;
4889     buf_si_i    = buf_si + nrows+1;
4890     buf_si[0]   = nrows;
4891     buf_si_i[0] = 0;
4892     nrows       = 0;
4893     for (i=owners[proc]; i<owners[proc+1]; i++) {
4894       anzi = ai[i+1] - ai[i];
4895       if (anzi) {
4896         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4897         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4898         nrows++;
4899       }
4900     }
4901     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4902     k++;
4903     buf_si += len_si[proc];
4904   }
4905 
4906   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4907   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4908 
4909   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4910   for (i=0; i<merge->nrecv; i++) {
4911     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4912   }
4913 
4914   PetscCall(PetscFree(len_si));
4915   PetscCall(PetscFree(len_ri));
4916   PetscCall(PetscFree(rj_waits));
4917   PetscCall(PetscFree2(si_waits,sj_waits));
4918   PetscCall(PetscFree(ri_waits));
4919   PetscCall(PetscFree(buf_s));
4920   PetscCall(PetscFree(status));
4921 
4922   /* compute a local seq matrix in each processor */
4923   /*----------------------------------------------*/
4924   /* allocate bi array and free space for accumulating nonzero column info */
4925   PetscCall(PetscMalloc1(m+1,&bi));
4926   bi[0] = 0;
4927 
4928   /* create and initialize a linked list */
4929   nlnk = N+1;
4930   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4931 
4932   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4933   len  = ai[owners[rank+1]] - ai[owners[rank]];
4934   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4935 
4936   current_space = free_space;
4937 
4938   /* determine symbolic info for each local row */
4939   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4940 
4941   for (k=0; k<merge->nrecv; k++) {
4942     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4943     nrows       = *buf_ri_k[k];
4944     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4945     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4946   }
4947 
4948   MatPreallocateBegin(comm,m,n,dnz,onz);
4949   len  = 0;
4950   for (i=0; i<m; i++) {
4951     bnzi = 0;
4952     /* add local non-zero cols of this proc's seqmat into lnk */
4953     arow  = owners[rank] + i;
4954     anzi  = ai[arow+1] - ai[arow];
4955     aj    = a->j + ai[arow];
4956     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4957     bnzi += nlnk;
4958     /* add received col data into lnk */
4959     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4960       if (i == *nextrow[k]) { /* i-th row */
4961         anzi  = *(nextai[k]+1) - *nextai[k];
4962         aj    = buf_rj[k] + *nextai[k];
4963         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4964         bnzi += nlnk;
4965         nextrow[k]++; nextai[k]++;
4966       }
4967     }
4968     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4969 
4970     /* if free space is not available, make more free space */
4971     if (current_space->local_remaining<bnzi) {
4972       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4973     }
4974     /* copy data into free space, then initialize lnk */
4975     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4976     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4977 
4978     current_space->array           += bnzi;
4979     current_space->local_used      += bnzi;
4980     current_space->local_remaining -= bnzi;
4981 
4982     bi[i+1] = bi[i] + bnzi;
4983   }
4984 
4985   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4986 
4987   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4988   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4989   PetscCall(PetscLLDestroy(lnk,lnkbt));
4990 
4991   /* create symbolic parallel matrix B_mpi */
4992   /*---------------------------------------*/
4993   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4994   PetscCall(MatCreate(comm,&B_mpi));
4995   if (n==PETSC_DECIDE) {
4996     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4997   } else {
4998     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4999   }
5000   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
5001   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
5002   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
5003   MatPreallocateEnd(dnz,onz);
5004   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
5005 
5006   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5007   B_mpi->assembled  = PETSC_FALSE;
5008   merge->bi         = bi;
5009   merge->bj         = bj;
5010   merge->buf_ri     = buf_ri;
5011   merge->buf_rj     = buf_rj;
5012   merge->coi        = NULL;
5013   merge->coj        = NULL;
5014   merge->owners_co  = NULL;
5015 
5016   PetscCall(PetscCommDestroy(&comm));
5017 
5018   /* attach the supporting struct to B_mpi for reuse */
5019   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
5020   PetscCall(PetscContainerSetPointer(container,merge));
5021   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
5022   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
5023   PetscCall(PetscContainerDestroy(&container));
5024   *mpimat = B_mpi;
5025 
5026   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
5027   PetscFunctionReturn(0);
5028 }
5029 
5030 /*@C
5031       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5032                  matrices from each processor
5033 
5034     Collective
5035 
5036    Input Parameters:
5037 +    comm - the communicators the parallel matrix will live on
5038 .    seqmat - the input sequential matrices
5039 .    m - number of local rows (or PETSC_DECIDE)
5040 .    n - number of local columns (or PETSC_DECIDE)
5041 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5042 
5043    Output Parameter:
5044 .    mpimat - the parallel matrix generated
5045 
5046     Level: advanced
5047 
5048    Notes:
5049      The dimensions of the sequential matrix in each processor MUST be the same.
5050      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5051      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5052 @*/
5053 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5054 {
5055   PetscMPIInt    size;
5056 
5057   PetscFunctionBegin;
5058   PetscCallMPI(MPI_Comm_size(comm,&size));
5059   if (size == 1) {
5060     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5061     if (scall == MAT_INITIAL_MATRIX) {
5062       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
5063     } else {
5064       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
5065     }
5066     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5067     PetscFunctionReturn(0);
5068   }
5069   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5070   if (scall == MAT_INITIAL_MATRIX) {
5071     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
5072   }
5073   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
5074   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5075   PetscFunctionReturn(0);
5076 }
5077 
5078 /*@
5079      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5080           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5081           with MatGetSize()
5082 
5083     Not Collective
5084 
5085    Input Parameters:
5086 +    A - the matrix
5087 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5088 
5089    Output Parameter:
5090 .    A_loc - the local sequential matrix generated
5091 
5092     Level: developer
5093 
5094    Notes:
5095      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5096 
5097      Destroy the matrix with MatDestroy()
5098 
5099 .seealso: MatMPIAIJGetLocalMat()
5100 
5101 @*/
5102 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5103 {
5104   PetscBool      mpi;
5105 
5106   PetscFunctionBegin;
5107   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5108   if (mpi) {
5109     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5110   } else {
5111     *A_loc = A;
5112     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5113   }
5114   PetscFunctionReturn(0);
5115 }
5116 
5117 /*@
5118      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5119           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5120           with MatGetSize()
5121 
5122     Not Collective
5123 
5124    Input Parameters:
5125 +    A - the matrix
5126 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5127 
5128    Output Parameter:
5129 .    A_loc - the local sequential matrix generated
5130 
5131     Level: developer
5132 
5133    Notes:
5134      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5135 
5136      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5137      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5138      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5139      modify the values of the returned A_loc.
5140 
5141 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5142 @*/
5143 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5144 {
5145   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5146   Mat_SeqAIJ        *mat,*a,*b;
5147   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5148   const PetscScalar *aa,*ba,*aav,*bav;
5149   PetscScalar       *ca,*cam;
5150   PetscMPIInt       size;
5151   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5152   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5153   PetscBool         match;
5154 
5155   PetscFunctionBegin;
5156   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5157   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5158   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5159   if (size == 1) {
5160     if (scall == MAT_INITIAL_MATRIX) {
5161       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5162       *A_loc = mpimat->A;
5163     } else if (scall == MAT_REUSE_MATRIX) {
5164       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5165     }
5166     PetscFunctionReturn(0);
5167   }
5168 
5169   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5170   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5171   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5172   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5173   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5174   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5175   aa   = aav;
5176   ba   = bav;
5177   if (scall == MAT_INITIAL_MATRIX) {
5178     PetscCall(PetscMalloc1(1+am,&ci));
5179     ci[0] = 0;
5180     for (i=0; i<am; i++) {
5181       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5182     }
5183     PetscCall(PetscMalloc1(1+ci[am],&cj));
5184     PetscCall(PetscMalloc1(1+ci[am],&ca));
5185     k    = 0;
5186     for (i=0; i<am; i++) {
5187       ncols_o = bi[i+1] - bi[i];
5188       ncols_d = ai[i+1] - ai[i];
5189       /* off-diagonal portion of A */
5190       for (jo=0; jo<ncols_o; jo++) {
5191         col = cmap[*bj];
5192         if (col >= cstart) break;
5193         cj[k]   = col; bj++;
5194         ca[k++] = *ba++;
5195       }
5196       /* diagonal portion of A */
5197       for (j=0; j<ncols_d; j++) {
5198         cj[k]   = cstart + *aj++;
5199         ca[k++] = *aa++;
5200       }
5201       /* off-diagonal portion of A */
5202       for (j=jo; j<ncols_o; j++) {
5203         cj[k]   = cmap[*bj++];
5204         ca[k++] = *ba++;
5205       }
5206     }
5207     /* put together the new matrix */
5208     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5209     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5210     /* Since these are PETSc arrays, change flags to free them as necessary. */
5211     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5212     mat->free_a  = PETSC_TRUE;
5213     mat->free_ij = PETSC_TRUE;
5214     mat->nonew   = 0;
5215   } else if (scall == MAT_REUSE_MATRIX) {
5216     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5217     ci   = mat->i;
5218     cj   = mat->j;
5219     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5220     for (i=0; i<am; i++) {
5221       /* off-diagonal portion of A */
5222       ncols_o = bi[i+1] - bi[i];
5223       for (jo=0; jo<ncols_o; jo++) {
5224         col = cmap[*bj];
5225         if (col >= cstart) break;
5226         *cam++ = *ba++; bj++;
5227       }
5228       /* diagonal portion of A */
5229       ncols_d = ai[i+1] - ai[i];
5230       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5231       /* off-diagonal portion of A */
5232       for (j=jo; j<ncols_o; j++) {
5233         *cam++ = *ba++; bj++;
5234       }
5235     }
5236     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5237   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5238   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5239   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5240   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5241   PetscFunctionReturn(0);
5242 }
5243 
5244 /*@
5245      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5246           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5247 
5248     Not Collective
5249 
5250    Input Parameters:
5251 +    A - the matrix
5252 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5253 
5254    Output Parameters:
5255 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5256 -    A_loc - the local sequential matrix generated
5257 
5258     Level: developer
5259 
5260    Notes:
5261      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5262 
5263 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5264 
5265 @*/
5266 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5267 {
5268   Mat            Ao,Ad;
5269   const PetscInt *cmap;
5270   PetscMPIInt    size;
5271   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5272 
5273   PetscFunctionBegin;
5274   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5275   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5276   if (size == 1) {
5277     if (scall == MAT_INITIAL_MATRIX) {
5278       PetscCall(PetscObjectReference((PetscObject)Ad));
5279       *A_loc = Ad;
5280     } else if (scall == MAT_REUSE_MATRIX) {
5281       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5282     }
5283     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5284     PetscFunctionReturn(0);
5285   }
5286   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5287   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5288   if (f) {
5289     PetscCall((*f)(A,scall,glob,A_loc));
5290   } else {
5291     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5292     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5293     Mat_SeqAIJ        *c;
5294     PetscInt          *ai = a->i, *aj = a->j;
5295     PetscInt          *bi = b->i, *bj = b->j;
5296     PetscInt          *ci,*cj;
5297     const PetscScalar *aa,*ba;
5298     PetscScalar       *ca;
5299     PetscInt          i,j,am,dn,on;
5300 
5301     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5302     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5303     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5304     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5305     if (scall == MAT_INITIAL_MATRIX) {
5306       PetscInt k;
5307       PetscCall(PetscMalloc1(1+am,&ci));
5308       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5309       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5310       ci[0] = 0;
5311       for (i=0,k=0; i<am; i++) {
5312         const PetscInt ncols_o = bi[i+1] - bi[i];
5313         const PetscInt ncols_d = ai[i+1] - ai[i];
5314         ci[i+1] = ci[i] + ncols_o + ncols_d;
5315         /* diagonal portion of A */
5316         for (j=0; j<ncols_d; j++,k++) {
5317           cj[k] = *aj++;
5318           ca[k] = *aa++;
5319         }
5320         /* off-diagonal portion of A */
5321         for (j=0; j<ncols_o; j++,k++) {
5322           cj[k] = dn + *bj++;
5323           ca[k] = *ba++;
5324         }
5325       }
5326       /* put together the new matrix */
5327       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5328       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5329       /* Since these are PETSc arrays, change flags to free them as necessary. */
5330       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5331       c->free_a  = PETSC_TRUE;
5332       c->free_ij = PETSC_TRUE;
5333       c->nonew   = 0;
5334       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5335     } else if (scall == MAT_REUSE_MATRIX) {
5336       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5337       for (i=0; i<am; i++) {
5338         const PetscInt ncols_d = ai[i+1] - ai[i];
5339         const PetscInt ncols_o = bi[i+1] - bi[i];
5340         /* diagonal portion of A */
5341         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5342         /* off-diagonal portion of A */
5343         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5344       }
5345       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5346     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5347     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5348     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5349     if (glob) {
5350       PetscInt cst, *gidx;
5351 
5352       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5353       PetscCall(PetscMalloc1(dn+on,&gidx));
5354       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5355       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5356       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5357     }
5358   }
5359   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5360   PetscFunctionReturn(0);
5361 }
5362 
5363 /*@C
5364      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5365 
5366     Not Collective
5367 
5368    Input Parameters:
5369 +    A - the matrix
5370 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5371 -    row, col - index sets of rows and columns to extract (or NULL)
5372 
5373    Output Parameter:
5374 .    A_loc - the local sequential matrix generated
5375 
5376     Level: developer
5377 
5378 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5379 
5380 @*/
5381 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5382 {
5383   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5384   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5385   IS             isrowa,iscola;
5386   Mat            *aloc;
5387   PetscBool      match;
5388 
5389   PetscFunctionBegin;
5390   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5391   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5392   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5393   if (!row) {
5394     start = A->rmap->rstart; end = A->rmap->rend;
5395     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5396   } else {
5397     isrowa = *row;
5398   }
5399   if (!col) {
5400     start = A->cmap->rstart;
5401     cmap  = a->garray;
5402     nzA   = a->A->cmap->n;
5403     nzB   = a->B->cmap->n;
5404     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5405     ncols = 0;
5406     for (i=0; i<nzB; i++) {
5407       if (cmap[i] < start) idx[ncols++] = cmap[i];
5408       else break;
5409     }
5410     imark = i;
5411     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5412     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5413     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5414   } else {
5415     iscola = *col;
5416   }
5417   if (scall != MAT_INITIAL_MATRIX) {
5418     PetscCall(PetscMalloc1(1,&aloc));
5419     aloc[0] = *A_loc;
5420   }
5421   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5422   if (!col) { /* attach global id of condensed columns */
5423     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5424   }
5425   *A_loc = aloc[0];
5426   PetscCall(PetscFree(aloc));
5427   if (!row) {
5428     PetscCall(ISDestroy(&isrowa));
5429   }
5430   if (!col) {
5431     PetscCall(ISDestroy(&iscola));
5432   }
5433   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5434   PetscFunctionReturn(0);
5435 }
5436 
5437 /*
5438  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5439  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5440  * on a global size.
5441  * */
5442 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5443 {
5444   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5445   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5446   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5447   PetscMPIInt              owner;
5448   PetscSFNode              *iremote,*oiremote;
5449   const PetscInt           *lrowindices;
5450   PetscSF                  sf,osf;
5451   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5452   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5453   MPI_Comm                 comm;
5454   ISLocalToGlobalMapping   mapping;
5455   const PetscScalar        *pd_a,*po_a;
5456 
5457   PetscFunctionBegin;
5458   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5459   /* plocalsize is the number of roots
5460    * nrows is the number of leaves
5461    * */
5462   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5463   PetscCall(ISGetLocalSize(rows,&nrows));
5464   PetscCall(PetscCalloc1(nrows,&iremote));
5465   PetscCall(ISGetIndices(rows,&lrowindices));
5466   for (i=0;i<nrows;i++) {
5467     /* Find a remote index and an owner for a row
5468      * The row could be local or remote
5469      * */
5470     owner = 0;
5471     lidx  = 0;
5472     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5473     iremote[i].index = lidx;
5474     iremote[i].rank  = owner;
5475   }
5476   /* Create SF to communicate how many nonzero columns for each row */
5477   PetscCall(PetscSFCreate(comm,&sf));
5478   /* SF will figure out the number of nonzero colunms for each row, and their
5479    * offsets
5480    * */
5481   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5482   PetscCall(PetscSFSetFromOptions(sf));
5483   PetscCall(PetscSFSetUp(sf));
5484 
5485   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5486   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5487   PetscCall(PetscCalloc1(nrows,&pnnz));
5488   roffsets[0] = 0;
5489   roffsets[1] = 0;
5490   for (i=0;i<plocalsize;i++) {
5491     /* diag */
5492     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5493     /* off diag */
5494     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5495     /* compute offsets so that we relative location for each row */
5496     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5497     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5498   }
5499   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5500   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5501   /* 'r' means root, and 'l' means leaf */
5502   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5503   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5504   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5505   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5506   PetscCall(PetscSFDestroy(&sf));
5507   PetscCall(PetscFree(roffsets));
5508   PetscCall(PetscFree(nrcols));
5509   dntotalcols = 0;
5510   ontotalcols = 0;
5511   ncol = 0;
5512   for (i=0;i<nrows;i++) {
5513     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5514     ncol = PetscMax(pnnz[i],ncol);
5515     /* diag */
5516     dntotalcols += nlcols[i*2+0];
5517     /* off diag */
5518     ontotalcols += nlcols[i*2+1];
5519   }
5520   /* We do not need to figure the right number of columns
5521    * since all the calculations will be done by going through the raw data
5522    * */
5523   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5524   PetscCall(MatSetUp(*P_oth));
5525   PetscCall(PetscFree(pnnz));
5526   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5527   /* diag */
5528   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5529   /* off diag */
5530   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5531   /* diag */
5532   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5533   /* off diag */
5534   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5535   dntotalcols = 0;
5536   ontotalcols = 0;
5537   ntotalcols  = 0;
5538   for (i=0;i<nrows;i++) {
5539     owner = 0;
5540     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5541     /* Set iremote for diag matrix */
5542     for (j=0;j<nlcols[i*2+0];j++) {
5543       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5544       iremote[dntotalcols].rank    = owner;
5545       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5546       ilocal[dntotalcols++]        = ntotalcols++;
5547     }
5548     /* off diag */
5549     for (j=0;j<nlcols[i*2+1];j++) {
5550       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5551       oiremote[ontotalcols].rank    = owner;
5552       oilocal[ontotalcols++]        = ntotalcols++;
5553     }
5554   }
5555   PetscCall(ISRestoreIndices(rows,&lrowindices));
5556   PetscCall(PetscFree(loffsets));
5557   PetscCall(PetscFree(nlcols));
5558   PetscCall(PetscSFCreate(comm,&sf));
5559   /* P serves as roots and P_oth is leaves
5560    * Diag matrix
5561    * */
5562   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5563   PetscCall(PetscSFSetFromOptions(sf));
5564   PetscCall(PetscSFSetUp(sf));
5565 
5566   PetscCall(PetscSFCreate(comm,&osf));
5567   /* Off diag */
5568   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5569   PetscCall(PetscSFSetFromOptions(osf));
5570   PetscCall(PetscSFSetUp(osf));
5571   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5572   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5573   /* We operate on the matrix internal data for saving memory */
5574   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5575   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5576   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5577   /* Convert to global indices for diag matrix */
5578   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5579   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5580   /* We want P_oth store global indices */
5581   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5582   /* Use memory scalable approach */
5583   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5584   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5585   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5586   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5587   /* Convert back to local indices */
5588   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5589   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5590   nout = 0;
5591   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5592   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5593   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5594   /* Exchange values */
5595   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5596   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5597   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5598   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5599   /* Stop PETSc from shrinking memory */
5600   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5601   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5602   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5603   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5604   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5605   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5606   PetscCall(PetscSFDestroy(&sf));
5607   PetscCall(PetscSFDestroy(&osf));
5608   PetscFunctionReturn(0);
5609 }
5610 
5611 /*
5612  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5613  * This supports MPIAIJ and MAIJ
5614  * */
5615 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5616 {
5617   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5618   Mat_SeqAIJ            *p_oth;
5619   IS                    rows,map;
5620   PetscHMapI            hamp;
5621   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5622   MPI_Comm              comm;
5623   PetscSF               sf,osf;
5624   PetscBool             has;
5625 
5626   PetscFunctionBegin;
5627   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5628   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5629   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5630    *  and then create a submatrix (that often is an overlapping matrix)
5631    * */
5632   if (reuse == MAT_INITIAL_MATRIX) {
5633     /* Use a hash table to figure out unique keys */
5634     PetscCall(PetscHMapICreate(&hamp));
5635     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5636     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5637     count = 0;
5638     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5639     for (i=0;i<a->B->cmap->n;i++) {
5640       key  = a->garray[i]/dof;
5641       PetscCall(PetscHMapIHas(hamp,key,&has));
5642       if (!has) {
5643         mapping[i] = count;
5644         PetscCall(PetscHMapISet(hamp,key,count++));
5645       } else {
5646         /* Current 'i' has the same value the previous step */
5647         mapping[i] = count-1;
5648       }
5649     }
5650     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5651     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5652     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5653     PetscCall(PetscCalloc1(htsize,&rowindices));
5654     off = 0;
5655     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5656     PetscCall(PetscHMapIDestroy(&hamp));
5657     PetscCall(PetscSortInt(htsize,rowindices));
5658     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5659     /* In case, the matrix was already created but users want to recreate the matrix */
5660     PetscCall(MatDestroy(P_oth));
5661     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5662     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5663     PetscCall(ISDestroy(&map));
5664     PetscCall(ISDestroy(&rows));
5665   } else if (reuse == MAT_REUSE_MATRIX) {
5666     /* If matrix was already created, we simply update values using SF objects
5667      * that as attached to the matrix ealier.
5668      */
5669     const PetscScalar *pd_a,*po_a;
5670 
5671     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5672     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5673     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5674     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5675     /* Update values in place */
5676     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5677     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5678     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5679     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5680     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5681     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5682     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5683     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5684   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5685   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5686   PetscFunctionReturn(0);
5687 }
5688 
5689 /*@C
5690   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5691 
5692   Collective on Mat
5693 
5694   Input Parameters:
5695 + A - the first matrix in mpiaij format
5696 . B - the second matrix in mpiaij format
5697 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5698 
5699   Output Parameters:
5700 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5701 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5702 - B_seq - the sequential matrix generated
5703 
5704   Level: developer
5705 
5706 @*/
5707 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5708 {
5709   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5710   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5711   IS             isrowb,iscolb;
5712   Mat            *bseq=NULL;
5713 
5714   PetscFunctionBegin;
5715   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5716     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5717   }
5718   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5719 
5720   if (scall == MAT_INITIAL_MATRIX) {
5721     start = A->cmap->rstart;
5722     cmap  = a->garray;
5723     nzA   = a->A->cmap->n;
5724     nzB   = a->B->cmap->n;
5725     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5726     ncols = 0;
5727     for (i=0; i<nzB; i++) {  /* row < local row index */
5728       if (cmap[i] < start) idx[ncols++] = cmap[i];
5729       else break;
5730     }
5731     imark = i;
5732     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5733     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5734     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5735     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5736   } else {
5737     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5738     isrowb  = *rowb; iscolb = *colb;
5739     PetscCall(PetscMalloc1(1,&bseq));
5740     bseq[0] = *B_seq;
5741   }
5742   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5743   *B_seq = bseq[0];
5744   PetscCall(PetscFree(bseq));
5745   if (!rowb) {
5746     PetscCall(ISDestroy(&isrowb));
5747   } else {
5748     *rowb = isrowb;
5749   }
5750   if (!colb) {
5751     PetscCall(ISDestroy(&iscolb));
5752   } else {
5753     *colb = iscolb;
5754   }
5755   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5756   PetscFunctionReturn(0);
5757 }
5758 
5759 /*
5760     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5761     of the OFF-DIAGONAL portion of local A
5762 
5763     Collective on Mat
5764 
5765    Input Parameters:
5766 +    A,B - the matrices in mpiaij format
5767 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5768 
5769    Output Parameter:
5770 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5771 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5772 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5773 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5774 
5775     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5776      for this matrix. This is not desirable..
5777 
5778     Level: developer
5779 
5780 */
5781 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5782 {
5783   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5784   Mat_SeqAIJ             *b_oth;
5785   VecScatter             ctx;
5786   MPI_Comm               comm;
5787   const PetscMPIInt      *rprocs,*sprocs;
5788   const PetscInt         *srow,*rstarts,*sstarts;
5789   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5790   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5791   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5792   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5793   PetscMPIInt            size,tag,rank,nreqs;
5794 
5795   PetscFunctionBegin;
5796   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5797   PetscCallMPI(MPI_Comm_size(comm,&size));
5798 
5799   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5800     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5801   }
5802   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5803   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5804 
5805   if (size == 1) {
5806     startsj_s = NULL;
5807     bufa_ptr  = NULL;
5808     *B_oth    = NULL;
5809     PetscFunctionReturn(0);
5810   }
5811 
5812   ctx = a->Mvctx;
5813   tag = ((PetscObject)ctx)->tag;
5814 
5815   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5816   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5817   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5818   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5819   PetscCall(PetscMalloc1(nreqs,&reqs));
5820   rwaits = reqs;
5821   swaits = reqs + nrecvs;
5822 
5823   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5824   if (scall == MAT_INITIAL_MATRIX) {
5825     /* i-array */
5826     /*---------*/
5827     /*  post receives */
5828     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5829     for (i=0; i<nrecvs; i++) {
5830       rowlen = rvalues + rstarts[i]*rbs;
5831       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5832       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5833     }
5834 
5835     /* pack the outgoing message */
5836     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5837 
5838     sstartsj[0] = 0;
5839     rstartsj[0] = 0;
5840     len         = 0; /* total length of j or a array to be sent */
5841     if (nsends) {
5842       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5843       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5844     }
5845     for (i=0; i<nsends; i++) {
5846       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5847       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5848       for (j=0; j<nrows; j++) {
5849         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5850         for (l=0; l<sbs; l++) {
5851           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5852 
5853           rowlen[j*sbs+l] = ncols;
5854 
5855           len += ncols;
5856           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5857         }
5858         k++;
5859       }
5860       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5861 
5862       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5863     }
5864     /* recvs and sends of i-array are completed */
5865     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5866     PetscCall(PetscFree(svalues));
5867 
5868     /* allocate buffers for sending j and a arrays */
5869     PetscCall(PetscMalloc1(len+1,&bufj));
5870     PetscCall(PetscMalloc1(len+1,&bufa));
5871 
5872     /* create i-array of B_oth */
5873     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5874 
5875     b_othi[0] = 0;
5876     len       = 0; /* total length of j or a array to be received */
5877     k         = 0;
5878     for (i=0; i<nrecvs; i++) {
5879       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5880       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5881       for (j=0; j<nrows; j++) {
5882         b_othi[k+1] = b_othi[k] + rowlen[j];
5883         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5884         k++;
5885       }
5886       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5887     }
5888     PetscCall(PetscFree(rvalues));
5889 
5890     /* allocate space for j and a arrays of B_oth */
5891     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5892     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5893 
5894     /* j-array */
5895     /*---------*/
5896     /*  post receives of j-array */
5897     for (i=0; i<nrecvs; i++) {
5898       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5899       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5900     }
5901 
5902     /* pack the outgoing message j-array */
5903     if (nsends) k = sstarts[0];
5904     for (i=0; i<nsends; i++) {
5905       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5906       bufJ  = bufj+sstartsj[i];
5907       for (j=0; j<nrows; j++) {
5908         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5909         for (ll=0; ll<sbs; ll++) {
5910           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5911           for (l=0; l<ncols; l++) {
5912             *bufJ++ = cols[l];
5913           }
5914           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5915         }
5916       }
5917       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5918     }
5919 
5920     /* recvs and sends of j-array are completed */
5921     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5922   } else if (scall == MAT_REUSE_MATRIX) {
5923     sstartsj = *startsj_s;
5924     rstartsj = *startsj_r;
5925     bufa     = *bufa_ptr;
5926     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5927     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5928   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5929 
5930   /* a-array */
5931   /*---------*/
5932   /*  post receives of a-array */
5933   for (i=0; i<nrecvs; i++) {
5934     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5935     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5936   }
5937 
5938   /* pack the outgoing message a-array */
5939   if (nsends) k = sstarts[0];
5940   for (i=0; i<nsends; i++) {
5941     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5942     bufA  = bufa+sstartsj[i];
5943     for (j=0; j<nrows; j++) {
5944       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5945       for (ll=0; ll<sbs; ll++) {
5946         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5947         for (l=0; l<ncols; l++) {
5948           *bufA++ = vals[l];
5949         }
5950         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5951       }
5952     }
5953     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5954   }
5955   /* recvs and sends of a-array are completed */
5956   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5957   PetscCall(PetscFree(reqs));
5958 
5959   if (scall == MAT_INITIAL_MATRIX) {
5960     /* put together the new matrix */
5961     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5962 
5963     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5964     /* Since these are PETSc arrays, change flags to free them as necessary. */
5965     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5966     b_oth->free_a  = PETSC_TRUE;
5967     b_oth->free_ij = PETSC_TRUE;
5968     b_oth->nonew   = 0;
5969 
5970     PetscCall(PetscFree(bufj));
5971     if (!startsj_s || !bufa_ptr) {
5972       PetscCall(PetscFree2(sstartsj,rstartsj));
5973       PetscCall(PetscFree(bufa_ptr));
5974     } else {
5975       *startsj_s = sstartsj;
5976       *startsj_r = rstartsj;
5977       *bufa_ptr  = bufa;
5978     }
5979   } else if (scall == MAT_REUSE_MATRIX) {
5980     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5981   }
5982 
5983   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5984   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5985   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5986   PetscFunctionReturn(0);
5987 }
5988 
5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5992 #if defined(PETSC_HAVE_MKL_SPARSE)
5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5994 #endif
5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5996 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5997 #if defined(PETSC_HAVE_ELEMENTAL)
5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5999 #endif
6000 #if defined(PETSC_HAVE_SCALAPACK)
6001 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
6002 #endif
6003 #if defined(PETSC_HAVE_HYPRE)
6004 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
6005 #endif
6006 #if defined(PETSC_HAVE_CUDA)
6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
6008 #endif
6009 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6010 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
6011 #endif
6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
6013 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
6014 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6015 
6016 /*
6017     Computes (B'*A')' since computing B*A directly is untenable
6018 
6019                n                       p                          p
6020         [             ]       [             ]         [                 ]
6021       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6022         [             ]       [             ]         [                 ]
6023 
6024 */
6025 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
6026 {
6027   Mat            At,Bt,Ct;
6028 
6029   PetscFunctionBegin;
6030   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
6031   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
6032   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
6033   PetscCall(MatDestroy(&At));
6034   PetscCall(MatDestroy(&Bt));
6035   PetscCall(MatTransposeSetPrecursor(Ct,C));
6036   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
6037   PetscCall(MatDestroy(&Ct));
6038   PetscFunctionReturn(0);
6039 }
6040 
6041 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6042 {
6043   PetscBool      cisdense;
6044 
6045   PetscFunctionBegin;
6046   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
6047   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
6048   PetscCall(MatSetBlockSizesFromMats(C,A,B));
6049   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
6050   if (!cisdense) {
6051     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6052   }
6053   PetscCall(MatSetUp(C));
6054 
6055   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6056   PetscFunctionReturn(0);
6057 }
6058 
6059 /* ----------------------------------------------------------------*/
6060 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6061 {
6062   Mat_Product *product = C->product;
6063   Mat         A = product->A,B=product->B;
6064 
6065   PetscFunctionBegin;
6066   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6067     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6068 
6069   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6070   C->ops->productsymbolic = MatProductSymbolic_AB;
6071   PetscFunctionReturn(0);
6072 }
6073 
6074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6075 {
6076   Mat_Product    *product = C->product;
6077 
6078   PetscFunctionBegin;
6079   if (product->type == MATPRODUCT_AB) {
6080     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6081   }
6082   PetscFunctionReturn(0);
6083 }
6084 
6085 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6086 
6087   Input Parameters:
6088 
6089     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6090     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6091 
6092     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6093 
6094     For Set1, j1[] contains column indices of the nonzeros.
6095     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6096     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6097     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6098 
6099     Similar for Set2.
6100 
6101     This routine merges the two sets of nonzeros row by row and removes repeats.
6102 
6103   Output Parameters: (memory is allocated by the caller)
6104 
6105     i[],j[]: the CSR of the merged matrix, which has m rows.
6106     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6107     imap2[]: similar to imap1[], but for Set2.
6108     Note we order nonzeros row-by-row and from left to right.
6109 */
6110 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6111   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6112   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6113 {
6114   PetscInt       r,m; /* Row index of mat */
6115   PetscCount     t,t1,t2,b1,e1,b2,e2;
6116 
6117   PetscFunctionBegin;
6118   PetscCall(MatGetLocalSize(mat,&m,NULL));
6119   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6120   i[0] = 0;
6121   for (r=0; r<m; r++) { /* Do row by row merging */
6122     b1   = rowBegin1[r];
6123     e1   = rowEnd1[r];
6124     b2   = rowBegin2[r];
6125     e2   = rowEnd2[r];
6126     while (b1 < e1 && b2 < e2) {
6127       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6128         j[t]      = j1[b1];
6129         imap1[t1] = t;
6130         imap2[t2] = t;
6131         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6132         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6133         t1++; t2++; t++;
6134       } else if (j1[b1] < j2[b2]) {
6135         j[t]      = j1[b1];
6136         imap1[t1] = t;
6137         b1       += jmap1[t1+1] - jmap1[t1];
6138         t1++; t++;
6139       } else {
6140         j[t]      = j2[b2];
6141         imap2[t2] = t;
6142         b2       += jmap2[t2+1] - jmap2[t2];
6143         t2++; t++;
6144       }
6145     }
6146     /* Merge the remaining in either j1[] or j2[] */
6147     while (b1 < e1) {
6148       j[t]      = j1[b1];
6149       imap1[t1] = t;
6150       b1       += jmap1[t1+1] - jmap1[t1];
6151       t1++; t++;
6152     }
6153     while (b2 < e2) {
6154       j[t]      = j2[b2];
6155       imap2[t2] = t;
6156       b2       += jmap2[t2+1] - jmap2[t2];
6157       t2++; t++;
6158     }
6159     i[r+1] = t;
6160   }
6161   PetscFunctionReturn(0);
6162 }
6163 
6164 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6165 
6166   Input Parameters:
6167     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6168     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6169       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6170 
6171       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6172       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6173 
6174   Output Parameters:
6175     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6176     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6177       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6178       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6179 
6180     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6181       Atot: number of entries belonging to the diagonal block.
6182       Annz: number of unique nonzeros belonging to the diagonal block.
6183       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6184         repeats (i.e., same 'i,j' pair).
6185       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6186         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6187 
6188       Atot: number of entries belonging to the diagonal block
6189       Annz: number of unique nonzeros belonging to the diagonal block.
6190 
6191     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6192 
6193     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6194 */
6195 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6196   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6197   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6198   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6199 {
6200   PetscInt          cstart,cend,rstart,rend,row,col;
6201   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6202   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6203   PetscCount        k,m,p,q,r,s,mid;
6204   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6205 
6206   PetscFunctionBegin;
6207   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6208   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6209   m    = rend - rstart;
6210 
6211   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6212 
6213   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6214      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6215   */
6216   while (k<n) {
6217     row = i[k];
6218     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6219     for (s=k; s<n; s++) if (i[s] != row) break;
6220     for (p=k; p<s; p++) {
6221       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6222       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6223     }
6224     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6225     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6226     rowBegin[row-rstart] = k;
6227     rowMid[row-rstart]   = mid;
6228     rowEnd[row-rstart]   = s;
6229 
6230     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6231     Atot += mid - k;
6232     Btot += s - mid;
6233 
6234     /* Count unique nonzeros of this diag/offdiag row */
6235     for (p=k; p<mid;) {
6236       col = j[p];
6237       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6238       Annz++;
6239     }
6240 
6241     for (p=mid; p<s;) {
6242       col = j[p];
6243       do {p++;} while (p<s && j[p] == col);
6244       Bnnz++;
6245     }
6246     k = s;
6247   }
6248 
6249   /* Allocation according to Atot, Btot, Annz, Bnnz */
6250   PetscCall(PetscMalloc1(Atot,&Aperm));
6251   PetscCall(PetscMalloc1(Btot,&Bperm));
6252   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6253   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6254 
6255   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6256   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6257   for (r=0; r<m; r++) {
6258     k     = rowBegin[r];
6259     mid   = rowMid[r];
6260     s     = rowEnd[r];
6261     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6262     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6263     Atot += mid - k;
6264     Btot += s - mid;
6265 
6266     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6267     for (p=k; p<mid;) {
6268       col = j[p];
6269       q   = p;
6270       do {p++;} while (p<mid && j[p] == col);
6271       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6272       Annz++;
6273     }
6274 
6275     for (p=mid; p<s;) {
6276       col = j[p];
6277       q   = p;
6278       do {p++;} while (p<s && j[p] == col);
6279       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6280       Bnnz++;
6281     }
6282   }
6283   /* Output */
6284   *Aperm_ = Aperm;
6285   *Annz_  = Annz;
6286   *Atot_  = Atot;
6287   *Ajmap_ = Ajmap;
6288   *Bperm_ = Bperm;
6289   *Bnnz_  = Bnnz;
6290   *Btot_  = Btot;
6291   *Bjmap_ = Bjmap;
6292   PetscFunctionReturn(0);
6293 }
6294 
6295 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6296 
6297   Input Parameters:
6298     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6299     nnz:  number of unique nonzeros in the merged matrix
6300     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6301     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6302 
6303   Output Parameter: (memory is allocated by the caller)
6304     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6305 
6306   Example:
6307     nnz1 = 4
6308     nnz  = 6
6309     imap = [1,3,4,5]
6310     jmap = [0,3,5,6,7]
6311    then,
6312     jmap_new = [0,0,3,3,5,6,7]
6313 */
6314 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6315 {
6316   PetscCount k,p;
6317 
6318   PetscFunctionBegin;
6319   jmap_new[0] = 0;
6320   p = nnz; /* p loops over jmap_new[] backwards */
6321   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6322     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6323   }
6324   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6325   PetscFunctionReturn(0);
6326 }
6327 
6328 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6329 {
6330   MPI_Comm                  comm;
6331   PetscMPIInt               rank,size;
6332   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6333   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6334   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6335 
6336   PetscFunctionBegin;
6337   PetscCall(PetscFree(mpiaij->garray));
6338   PetscCall(VecDestroy(&mpiaij->lvec));
6339 #if defined(PETSC_USE_CTABLE)
6340   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6341 #else
6342   PetscCall(PetscFree(mpiaij->colmap));
6343 #endif
6344   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6345   mat->assembled = PETSC_FALSE;
6346   mat->was_assembled = PETSC_FALSE;
6347   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6348 
6349   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6350   PetscCallMPI(MPI_Comm_size(comm,&size));
6351   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6352   PetscCall(PetscLayoutSetUp(mat->rmap));
6353   PetscCall(PetscLayoutSetUp(mat->cmap));
6354   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6355   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6356   PetscCall(MatGetLocalSize(mat,&m,&n));
6357   PetscCall(MatGetSize(mat,&M,&N));
6358 
6359   /* ---------------------------------------------------------------------------*/
6360   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6361   /* entries come first, then local rows, then remote rows.                     */
6362   /* ---------------------------------------------------------------------------*/
6363   PetscCount n1 = coo_n,*perm1;
6364   PetscInt   *i1 = coo_i,*j1 = coo_j;
6365 
6366   PetscCall(PetscMalloc1(n1,&perm1));
6367   for (k=0; k<n1; k++) perm1[k] = k;
6368 
6369   /* Manipulate indices so that entries with negative row or col indices will have smallest
6370      row indices, local entries will have greater but negative row indices, and remote entries
6371      will have positive row indices.
6372   */
6373   for (k=0; k<n1; k++) {
6374     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6375     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6376     else {
6377       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6378       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6379     }
6380   }
6381 
6382   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6383   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6384   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6385   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6386   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6387 
6388   /* ---------------------------------------------------------------------------*/
6389   /*           Split local rows into diag/offdiag portions                      */
6390   /* ---------------------------------------------------------------------------*/
6391   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6392   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6393   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6394 
6395   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6396   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6397   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6398 
6399   /* ---------------------------------------------------------------------------*/
6400   /*           Send remote rows to their owner                                  */
6401   /* ---------------------------------------------------------------------------*/
6402   /* Find which rows should be sent to which remote ranks*/
6403   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6404   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6405   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6406   const PetscInt *ranges;
6407   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6408 
6409   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6410   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6411   for (k=rem; k<n1;) {
6412     PetscMPIInt  owner;
6413     PetscInt     firstRow,lastRow;
6414 
6415     /* Locate a row range */
6416     firstRow = i1[k]; /* first row of this owner */
6417     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6418     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6419 
6420     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6421     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6422 
6423     /* All entries in [k,p) belong to this remote owner */
6424     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6425       PetscMPIInt *sendto2;
6426       PetscInt    *nentries2;
6427       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6428 
6429       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6430       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6431       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6432       PetscCall(PetscFree2(sendto,nentries2));
6433       sendto      = sendto2;
6434       nentries    = nentries2;
6435       maxNsend    = maxNsend2;
6436     }
6437     sendto[nsend]   = owner;
6438     nentries[nsend] = p - k;
6439     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6440     nsend++;
6441     k = p;
6442   }
6443 
6444   /* Build 1st SF to know offsets on remote to send data */
6445   PetscSF     sf1;
6446   PetscInt    nroots = 1,nroots2 = 0;
6447   PetscInt    nleaves = nsend,nleaves2 = 0;
6448   PetscInt    *offsets;
6449   PetscSFNode *iremote;
6450 
6451   PetscCall(PetscSFCreate(comm,&sf1));
6452   PetscCall(PetscMalloc1(nsend,&iremote));
6453   PetscCall(PetscMalloc1(nsend,&offsets));
6454   for (k=0; k<nsend; k++) {
6455     iremote[k].rank  = sendto[k];
6456     iremote[k].index = 0;
6457     nleaves2        += nentries[k];
6458     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6459   }
6460   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6461   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6462   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6463   PetscCall(PetscSFDestroy(&sf1));
6464   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6465 
6466   /* Build 2nd SF to send remote COOs to their owner */
6467   PetscSF sf2;
6468   nroots  = nroots2;
6469   nleaves = nleaves2;
6470   PetscCall(PetscSFCreate(comm,&sf2));
6471   PetscCall(PetscSFSetFromOptions(sf2));
6472   PetscCall(PetscMalloc1(nleaves,&iremote));
6473   p       = 0;
6474   for (k=0; k<nsend; k++) {
6475     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6476     for (q=0; q<nentries[k]; q++,p++) {
6477       iremote[p].rank  = sendto[k];
6478       iremote[p].index = offsets[k] + q;
6479     }
6480   }
6481   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6482 
6483   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6484   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6485 
6486   /* Send the remote COOs to their owner */
6487   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6488   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6489   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6490   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6491   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6492   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6493   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6494 
6495   PetscCall(PetscFree(offsets));
6496   PetscCall(PetscFree2(sendto,nentries));
6497 
6498   /* ---------------------------------------------------------------*/
6499   /* Sort received COOs by row along with the permutation array     */
6500   /* ---------------------------------------------------------------*/
6501   for (k=0; k<n2; k++) perm2[k] = k;
6502   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6503 
6504   /* ---------------------------------------------------------------*/
6505   /* Split received COOs into diag/offdiag portions                 */
6506   /* ---------------------------------------------------------------*/
6507   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6508   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6509   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6510 
6511   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6512   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6513 
6514   /* --------------------------------------------------------------------------*/
6515   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6516   /* --------------------------------------------------------------------------*/
6517   PetscInt   *Ai,*Bi;
6518   PetscInt   *Aj,*Bj;
6519 
6520   PetscCall(PetscMalloc1(m+1,&Ai));
6521   PetscCall(PetscMalloc1(m+1,&Bi));
6522   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6523   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6524 
6525   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6526   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6527   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6528   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6529   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6530 
6531   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6532   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6533 
6534   /* --------------------------------------------------------------------------*/
6535   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6536   /* expect nonzeros in A/B most likely have local contributing entries        */
6537   /* --------------------------------------------------------------------------*/
6538   PetscInt Annz = Ai[m];
6539   PetscInt Bnnz = Bi[m];
6540   PetscCount *Ajmap1_new,*Bjmap1_new;
6541 
6542   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6543   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6544 
6545   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6546   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6547 
6548   PetscCall(PetscFree(Aimap1));
6549   PetscCall(PetscFree(Ajmap1));
6550   PetscCall(PetscFree(Bimap1));
6551   PetscCall(PetscFree(Bjmap1));
6552   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6553   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6554   PetscCall(PetscFree(perm1));
6555   PetscCall(PetscFree3(i2,j2,perm2));
6556 
6557   Ajmap1 = Ajmap1_new;
6558   Bjmap1 = Bjmap1_new;
6559 
6560   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6561   if (Annz < Annz1 + Annz2) {
6562     PetscInt *Aj_new;
6563     PetscCall(PetscMalloc1(Annz,&Aj_new));
6564     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6565     PetscCall(PetscFree(Aj));
6566     Aj   = Aj_new;
6567   }
6568 
6569   if (Bnnz < Bnnz1 + Bnnz2) {
6570     PetscInt *Bj_new;
6571     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6572     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6573     PetscCall(PetscFree(Bj));
6574     Bj   = Bj_new;
6575   }
6576 
6577   /* --------------------------------------------------------------------------------*/
6578   /* Create new submatrices for on-process and off-process coupling                  */
6579   /* --------------------------------------------------------------------------------*/
6580   PetscScalar   *Aa,*Ba;
6581   MatType       rtype;
6582   Mat_SeqAIJ    *a,*b;
6583   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6584   PetscCall(PetscCalloc1(Bnnz,&Ba));
6585   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6586   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6587   PetscCall(MatDestroy(&mpiaij->A));
6588   PetscCall(MatDestroy(&mpiaij->B));
6589   PetscCall(MatGetRootType_Private(mat,&rtype));
6590   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6591   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6592   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6593 
6594   a = (Mat_SeqAIJ*)mpiaij->A->data;
6595   b = (Mat_SeqAIJ*)mpiaij->B->data;
6596   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6597   a->free_a       = b->free_a       = PETSC_TRUE;
6598   a->free_ij      = b->free_ij      = PETSC_TRUE;
6599 
6600   /* conversion must happen AFTER multiply setup */
6601   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6602   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6603   PetscCall(VecDestroy(&mpiaij->lvec));
6604   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6605   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6606 
6607   mpiaij->coo_n   = coo_n;
6608   mpiaij->coo_sf  = sf2;
6609   mpiaij->sendlen = nleaves;
6610   mpiaij->recvlen = nroots;
6611 
6612   mpiaij->Annz    = Annz;
6613   mpiaij->Bnnz    = Bnnz;
6614 
6615   mpiaij->Annz2   = Annz2;
6616   mpiaij->Bnnz2   = Bnnz2;
6617 
6618   mpiaij->Atot1   = Atot1;
6619   mpiaij->Atot2   = Atot2;
6620   mpiaij->Btot1   = Btot1;
6621   mpiaij->Btot2   = Btot2;
6622 
6623   mpiaij->Ajmap1  = Ajmap1;
6624   mpiaij->Aperm1  = Aperm1;
6625 
6626   mpiaij->Bjmap1  = Bjmap1;
6627   mpiaij->Bperm1  = Bperm1;
6628 
6629   mpiaij->Aimap2  = Aimap2;
6630   mpiaij->Ajmap2  = Ajmap2;
6631   mpiaij->Aperm2  = Aperm2;
6632 
6633   mpiaij->Bimap2  = Bimap2;
6634   mpiaij->Bjmap2  = Bjmap2;
6635   mpiaij->Bperm2  = Bperm2;
6636 
6637   mpiaij->Cperm1  = Cperm1;
6638 
6639   /* Allocate in preallocation. If not used, it has zero cost on host */
6640   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6641   PetscFunctionReturn(0);
6642 }
6643 
6644 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6645 {
6646   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6647   Mat                  A = mpiaij->A,B = mpiaij->B;
6648   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6649   PetscScalar          *Aa,*Ba;
6650   PetscScalar          *sendbuf = mpiaij->sendbuf;
6651   PetscScalar          *recvbuf = mpiaij->recvbuf;
6652   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6653   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6654   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6655   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6656 
6657   PetscFunctionBegin;
6658   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6659   PetscCall(MatSeqAIJGetArray(B,&Ba));
6660 
6661   /* Pack entries to be sent to remote */
6662   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6663 
6664   /* Send remote entries to their owner and overlap the communication with local computation */
6665   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6666   /* Add local entries to A and B */
6667   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6668     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6669     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6670     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6671   }
6672   for (PetscCount i=0; i<Bnnz; i++) {
6673     PetscScalar sum = 0.0;
6674     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6675     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6676   }
6677   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6678 
6679   /* Add received remote entries to A and B */
6680   for (PetscCount i=0; i<Annz2; i++) {
6681     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6682   }
6683   for (PetscCount i=0; i<Bnnz2; i++) {
6684     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6685   }
6686   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6687   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6688   PetscFunctionReturn(0);
6689 }
6690 
6691 /* ----------------------------------------------------------------*/
6692 
6693 /*MC
6694    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6695 
6696    Options Database Keys:
6697 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6698 
6699    Level: beginner
6700 
6701    Notes:
6702     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6703     in this case the values associated with the rows and columns one passes in are set to zero
6704     in the matrix
6705 
6706     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6707     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6708 
6709 .seealso: `MatCreateAIJ()`
6710 M*/
6711 
6712 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6713 {
6714   Mat_MPIAIJ     *b;
6715   PetscMPIInt    size;
6716 
6717   PetscFunctionBegin;
6718   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6719 
6720   PetscCall(PetscNewLog(B,&b));
6721   B->data       = (void*)b;
6722   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6723   B->assembled  = PETSC_FALSE;
6724   B->insertmode = NOT_SET_VALUES;
6725   b->size       = size;
6726 
6727   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6728 
6729   /* build cache for off array entries formed */
6730   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6731 
6732   b->donotstash  = PETSC_FALSE;
6733   b->colmap      = NULL;
6734   b->garray      = NULL;
6735   b->roworiented = PETSC_TRUE;
6736 
6737   /* stuff used for matrix vector multiply */
6738   b->lvec  = NULL;
6739   b->Mvctx = NULL;
6740 
6741   /* stuff for MatGetRow() */
6742   b->rowindices   = NULL;
6743   b->rowvalues    = NULL;
6744   b->getrowactive = PETSC_FALSE;
6745 
6746   /* flexible pointer used in CUSPARSE classes */
6747   b->spptr = NULL;
6748 
6749   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6750   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6751   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6752   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6753   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6754   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6755   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6756   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6757   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6758   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6759 #if defined(PETSC_HAVE_CUDA)
6760   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6761 #endif
6762 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6763   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6764 #endif
6765 #if defined(PETSC_HAVE_MKL_SPARSE)
6766   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6767 #endif
6768   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6769   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6770   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6771   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6772 #if defined(PETSC_HAVE_ELEMENTAL)
6773   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6774 #endif
6775 #if defined(PETSC_HAVE_SCALAPACK)
6776   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6777 #endif
6778   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6779   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6780 #if defined(PETSC_HAVE_HYPRE)
6781   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6782   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6783 #endif
6784   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6785   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6786   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6787   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6788   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6789   PetscFunctionReturn(0);
6790 }
6791 
6792 /*@C
6793      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6794          and "off-diagonal" part of the matrix in CSR format.
6795 
6796    Collective
6797 
6798    Input Parameters:
6799 +  comm - MPI communicator
6800 .  m - number of local rows (Cannot be PETSC_DECIDE)
6801 .  n - This value should be the same as the local size used in creating the
6802        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6803        calculated if N is given) For square matrices n is almost always m.
6804 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6805 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6806 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6807 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6808 .   a - matrix values
6809 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6810 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6811 -   oa - matrix values
6812 
6813    Output Parameter:
6814 .   mat - the matrix
6815 
6816    Level: advanced
6817 
6818    Notes:
6819        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6820        must free the arrays once the matrix has been destroyed and not before.
6821 
6822        The i and j indices are 0 based
6823 
6824        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6825 
6826        This sets local rows and cannot be used to set off-processor values.
6827 
6828        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6829        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6830        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6831        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6832        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6833        communication if it is known that only local entries will be set.
6834 
6835 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6836           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6837 @*/
6838 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6839 {
6840   Mat_MPIAIJ     *maij;
6841 
6842   PetscFunctionBegin;
6843   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6844   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6845   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6846   PetscCall(MatCreate(comm,mat));
6847   PetscCall(MatSetSizes(*mat,m,n,M,N));
6848   PetscCall(MatSetType(*mat,MATMPIAIJ));
6849   maij = (Mat_MPIAIJ*) (*mat)->data;
6850 
6851   (*mat)->preallocated = PETSC_TRUE;
6852 
6853   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6854   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6855 
6856   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6857   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6858 
6859   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6860   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6861   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6862   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6863   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6864   PetscFunctionReturn(0);
6865 }
6866 
6867 typedef struct {
6868   Mat       *mp;    /* intermediate products */
6869   PetscBool *mptmp; /* is the intermediate product temporary ? */
6870   PetscInt  cp;     /* number of intermediate products */
6871 
6872   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6873   PetscInt    *startsj_s,*startsj_r;
6874   PetscScalar *bufa;
6875   Mat         P_oth;
6876 
6877   /* may take advantage of merging product->B */
6878   Mat Bloc; /* B-local by merging diag and off-diag */
6879 
6880   /* cusparse does not have support to split between symbolic and numeric phases.
6881      When api_user is true, we don't need to update the numerical values
6882      of the temporary storage */
6883   PetscBool reusesym;
6884 
6885   /* support for COO values insertion */
6886   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6887   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6888   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6889   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6890   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6891   PetscMemType mtype;
6892 
6893   /* customization */
6894   PetscBool abmerge;
6895   PetscBool P_oth_bind;
6896 } MatMatMPIAIJBACKEND;
6897 
6898 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6899 {
6900   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6901   PetscInt            i;
6902 
6903   PetscFunctionBegin;
6904   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6905   PetscCall(PetscFree(mmdata->bufa));
6906   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6907   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6908   PetscCall(MatDestroy(&mmdata->P_oth));
6909   PetscCall(MatDestroy(&mmdata->Bloc));
6910   PetscCall(PetscSFDestroy(&mmdata->sf));
6911   for (i = 0; i < mmdata->cp; i++) {
6912     PetscCall(MatDestroy(&mmdata->mp[i]));
6913   }
6914   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6915   PetscCall(PetscFree(mmdata->own[0]));
6916   PetscCall(PetscFree(mmdata->own));
6917   PetscCall(PetscFree(mmdata->off[0]));
6918   PetscCall(PetscFree(mmdata->off));
6919   PetscCall(PetscFree(mmdata));
6920   PetscFunctionReturn(0);
6921 }
6922 
6923 /* Copy selected n entries with indices in idx[] of A to v[].
6924    If idx is NULL, copy the whole data array of A to v[]
6925  */
6926 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6927 {
6928   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6929 
6930   PetscFunctionBegin;
6931   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6932   if (f) {
6933     PetscCall((*f)(A,n,idx,v));
6934   } else {
6935     const PetscScalar *vv;
6936 
6937     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6938     if (n && idx) {
6939       PetscScalar    *w = v;
6940       const PetscInt *oi = idx;
6941       PetscInt       j;
6942 
6943       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6944     } else {
6945       PetscCall(PetscArraycpy(v,vv,n));
6946     }
6947     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6948   }
6949   PetscFunctionReturn(0);
6950 }
6951 
6952 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6953 {
6954   MatMatMPIAIJBACKEND *mmdata;
6955   PetscInt            i,n_d,n_o;
6956 
6957   PetscFunctionBegin;
6958   MatCheckProduct(C,1);
6959   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6960   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6961   if (!mmdata->reusesym) { /* update temporary matrices */
6962     if (mmdata->P_oth) {
6963       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6964     }
6965     if (mmdata->Bloc) {
6966       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6967     }
6968   }
6969   mmdata->reusesym = PETSC_FALSE;
6970 
6971   for (i = 0; i < mmdata->cp; i++) {
6972     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6973     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6974   }
6975   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6976     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6977 
6978     if (mmdata->mptmp[i]) continue;
6979     if (noff) {
6980       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6981 
6982       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6983       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6984       n_o += noff;
6985       n_d += nown;
6986     } else {
6987       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6988 
6989       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6990       n_d += mm->nz;
6991     }
6992   }
6993   if (mmdata->hasoffproc) { /* offprocess insertion */
6994     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6995     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6996   }
6997   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6998   PetscFunctionReturn(0);
6999 }
7000 
7001 /* Support for Pt * A, A * P, or Pt * A * P */
7002 #define MAX_NUMBER_INTERMEDIATE 4
7003 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7004 {
7005   Mat_Product            *product = C->product;
7006   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7007   Mat_MPIAIJ             *a,*p;
7008   MatMatMPIAIJBACKEND    *mmdata;
7009   ISLocalToGlobalMapping P_oth_l2g = NULL;
7010   IS                     glob = NULL;
7011   const char             *prefix;
7012   char                   pprefix[256];
7013   const PetscInt         *globidx,*P_oth_idx;
7014   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
7015   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
7016   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7017                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
7018                                                                                         /* a base offset; type-2: sparse with a local to global map table */
7019   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
7020 
7021   MatProductType         ptype;
7022   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
7023   PetscMPIInt            size;
7024 
7025   PetscFunctionBegin;
7026   MatCheckProduct(C,1);
7027   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
7028   ptype = product->type;
7029   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7030     ptype = MATPRODUCT_AB;
7031     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7032   }
7033   switch (ptype) {
7034   case MATPRODUCT_AB:
7035     A = product->A;
7036     P = product->B;
7037     m = A->rmap->n;
7038     n = P->cmap->n;
7039     M = A->rmap->N;
7040     N = P->cmap->N;
7041     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7042     break;
7043   case MATPRODUCT_AtB:
7044     P = product->A;
7045     A = product->B;
7046     m = P->cmap->n;
7047     n = A->cmap->n;
7048     M = P->cmap->N;
7049     N = A->cmap->N;
7050     hasoffproc = PETSC_TRUE;
7051     break;
7052   case MATPRODUCT_PtAP:
7053     A = product->A;
7054     P = product->B;
7055     m = P->cmap->n;
7056     n = P->cmap->n;
7057     M = P->cmap->N;
7058     N = P->cmap->N;
7059     hasoffproc = PETSC_TRUE;
7060     break;
7061   default:
7062     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7063   }
7064   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
7065   if (size == 1) hasoffproc = PETSC_FALSE;
7066 
7067   /* defaults */
7068   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
7069     mp[i]    = NULL;
7070     mptmp[i] = PETSC_FALSE;
7071     rmapt[i] = -1;
7072     cmapt[i] = -1;
7073     rmapa[i] = NULL;
7074     cmapa[i] = NULL;
7075   }
7076 
7077   /* customization */
7078   PetscCall(PetscNew(&mmdata));
7079   mmdata->reusesym = product->api_user;
7080   if (ptype == MATPRODUCT_AB) {
7081     if (product->api_user) {
7082       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
7083       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7084       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7085       PetscOptionsEnd();
7086     } else {
7087       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
7088       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7089       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7090       PetscOptionsEnd();
7091     }
7092   } else if (ptype == MATPRODUCT_PtAP) {
7093     if (product->api_user) {
7094       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
7095       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7096       PetscOptionsEnd();
7097     } else {
7098       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7099       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7100       PetscOptionsEnd();
7101     }
7102   }
7103   a = (Mat_MPIAIJ*)A->data;
7104   p = (Mat_MPIAIJ*)P->data;
7105   PetscCall(MatSetSizes(C,m,n,M,N));
7106   PetscCall(PetscLayoutSetUp(C->rmap));
7107   PetscCall(PetscLayoutSetUp(C->cmap));
7108   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7109   PetscCall(MatGetOptionsPrefix(C,&prefix));
7110 
7111   cp   = 0;
7112   switch (ptype) {
7113   case MATPRODUCT_AB: /* A * P */
7114     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7115 
7116     /* A_diag * P_local (merged or not) */
7117     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7118       /* P is product->B */
7119       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7120       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7121       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7122       PetscCall(MatProductSetFill(mp[cp],product->fill));
7123       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7124       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7125       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7126       mp[cp]->product->api_user = product->api_user;
7127       PetscCall(MatProductSetFromOptions(mp[cp]));
7128       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7129       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7130       PetscCall(ISGetIndices(glob,&globidx));
7131       rmapt[cp] = 1;
7132       cmapt[cp] = 2;
7133       cmapa[cp] = globidx;
7134       mptmp[cp] = PETSC_FALSE;
7135       cp++;
7136     } else { /* A_diag * P_diag and A_diag * P_off */
7137       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7138       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7139       PetscCall(MatProductSetFill(mp[cp],product->fill));
7140       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7141       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7142       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7143       mp[cp]->product->api_user = product->api_user;
7144       PetscCall(MatProductSetFromOptions(mp[cp]));
7145       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7146       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7147       rmapt[cp] = 1;
7148       cmapt[cp] = 1;
7149       mptmp[cp] = PETSC_FALSE;
7150       cp++;
7151       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7152       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7153       PetscCall(MatProductSetFill(mp[cp],product->fill));
7154       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7155       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7156       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7157       mp[cp]->product->api_user = product->api_user;
7158       PetscCall(MatProductSetFromOptions(mp[cp]));
7159       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7160       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7161       rmapt[cp] = 1;
7162       cmapt[cp] = 2;
7163       cmapa[cp] = p->garray;
7164       mptmp[cp] = PETSC_FALSE;
7165       cp++;
7166     }
7167 
7168     /* A_off * P_other */
7169     if (mmdata->P_oth) {
7170       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7171       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7172       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7173       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7174       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7175       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7176       PetscCall(MatProductSetFill(mp[cp],product->fill));
7177       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7178       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7179       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7180       mp[cp]->product->api_user = product->api_user;
7181       PetscCall(MatProductSetFromOptions(mp[cp]));
7182       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7183       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7184       rmapt[cp] = 1;
7185       cmapt[cp] = 2;
7186       cmapa[cp] = P_oth_idx;
7187       mptmp[cp] = PETSC_FALSE;
7188       cp++;
7189     }
7190     break;
7191 
7192   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7193     /* A is product->B */
7194     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7195     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7196       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7197       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7198       PetscCall(MatProductSetFill(mp[cp],product->fill));
7199       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7200       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7201       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7202       mp[cp]->product->api_user = product->api_user;
7203       PetscCall(MatProductSetFromOptions(mp[cp]));
7204       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7205       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7206       PetscCall(ISGetIndices(glob,&globidx));
7207       rmapt[cp] = 2;
7208       rmapa[cp] = globidx;
7209       cmapt[cp] = 2;
7210       cmapa[cp] = globidx;
7211       mptmp[cp] = PETSC_FALSE;
7212       cp++;
7213     } else {
7214       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7215       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7216       PetscCall(MatProductSetFill(mp[cp],product->fill));
7217       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7218       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7219       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7220       mp[cp]->product->api_user = product->api_user;
7221       PetscCall(MatProductSetFromOptions(mp[cp]));
7222       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7223       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7224       PetscCall(ISGetIndices(glob,&globidx));
7225       rmapt[cp] = 1;
7226       cmapt[cp] = 2;
7227       cmapa[cp] = globidx;
7228       mptmp[cp] = PETSC_FALSE;
7229       cp++;
7230       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7231       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7232       PetscCall(MatProductSetFill(mp[cp],product->fill));
7233       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7234       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7235       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7236       mp[cp]->product->api_user = product->api_user;
7237       PetscCall(MatProductSetFromOptions(mp[cp]));
7238       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7239       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7240       rmapt[cp] = 2;
7241       rmapa[cp] = p->garray;
7242       cmapt[cp] = 2;
7243       cmapa[cp] = globidx;
7244       mptmp[cp] = PETSC_FALSE;
7245       cp++;
7246     }
7247     break;
7248   case MATPRODUCT_PtAP:
7249     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7250     /* P is product->B */
7251     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7252     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7253     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7254     PetscCall(MatProductSetFill(mp[cp],product->fill));
7255     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7256     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7257     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7258     mp[cp]->product->api_user = product->api_user;
7259     PetscCall(MatProductSetFromOptions(mp[cp]));
7260     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7261     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7262     PetscCall(ISGetIndices(glob,&globidx));
7263     rmapt[cp] = 2;
7264     rmapa[cp] = globidx;
7265     cmapt[cp] = 2;
7266     cmapa[cp] = globidx;
7267     mptmp[cp] = PETSC_FALSE;
7268     cp++;
7269     if (mmdata->P_oth) {
7270       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7271       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7272       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7273       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7274       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7275       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7276       PetscCall(MatProductSetFill(mp[cp],product->fill));
7277       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7278       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7279       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7280       mp[cp]->product->api_user = product->api_user;
7281       PetscCall(MatProductSetFromOptions(mp[cp]));
7282       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7283       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7284       mptmp[cp] = PETSC_TRUE;
7285       cp++;
7286       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7287       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7288       PetscCall(MatProductSetFill(mp[cp],product->fill));
7289       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7290       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7291       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7292       mp[cp]->product->api_user = product->api_user;
7293       PetscCall(MatProductSetFromOptions(mp[cp]));
7294       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7295       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7296       rmapt[cp] = 2;
7297       rmapa[cp] = globidx;
7298       cmapt[cp] = 2;
7299       cmapa[cp] = P_oth_idx;
7300       mptmp[cp] = PETSC_FALSE;
7301       cp++;
7302     }
7303     break;
7304   default:
7305     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7306   }
7307   /* sanity check */
7308   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7309 
7310   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7311   for (i = 0; i < cp; i++) {
7312     mmdata->mp[i]    = mp[i];
7313     mmdata->mptmp[i] = mptmp[i];
7314   }
7315   mmdata->cp = cp;
7316   C->product->data       = mmdata;
7317   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7318   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7319 
7320   /* memory type */
7321   mmdata->mtype = PETSC_MEMTYPE_HOST;
7322   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7323   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7324   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7325   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7326 
7327   /* prepare coo coordinates for values insertion */
7328 
7329   /* count total nonzeros of those intermediate seqaij Mats
7330     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7331     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7332     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7333   */
7334   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7335     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7336     if (mptmp[cp]) continue;
7337     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7338       const PetscInt *rmap = rmapa[cp];
7339       const PetscInt mr = mp[cp]->rmap->n;
7340       const PetscInt rs = C->rmap->rstart;
7341       const PetscInt re = C->rmap->rend;
7342       const PetscInt *ii  = mm->i;
7343       for (i = 0; i < mr; i++) {
7344         const PetscInt gr = rmap[i];
7345         const PetscInt nz = ii[i+1] - ii[i];
7346         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7347         else ncoo_oown += nz; /* this row is local */
7348       }
7349     } else ncoo_d += mm->nz;
7350   }
7351 
7352   /*
7353     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7354 
7355     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7356 
7357     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7358 
7359     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7360     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7361     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7362 
7363     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7364     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7365   */
7366   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7367   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7368 
7369   /* gather (i,j) of nonzeros inserted by remote procs */
7370   if (hasoffproc) {
7371     PetscSF  msf;
7372     PetscInt ncoo2,*coo_i2,*coo_j2;
7373 
7374     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7375     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7376     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7377 
7378     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7379       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7380       PetscInt   *idxoff = mmdata->off[cp];
7381       PetscInt   *idxown = mmdata->own[cp];
7382       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7383         const PetscInt *rmap = rmapa[cp];
7384         const PetscInt *cmap = cmapa[cp];
7385         const PetscInt *ii  = mm->i;
7386         PetscInt       *coi = coo_i + ncoo_o;
7387         PetscInt       *coj = coo_j + ncoo_o;
7388         const PetscInt mr = mp[cp]->rmap->n;
7389         const PetscInt rs = C->rmap->rstart;
7390         const PetscInt re = C->rmap->rend;
7391         const PetscInt cs = C->cmap->rstart;
7392         for (i = 0; i < mr; i++) {
7393           const PetscInt *jj = mm->j + ii[i];
7394           const PetscInt gr  = rmap[i];
7395           const PetscInt nz  = ii[i+1] - ii[i];
7396           if (gr < rs || gr >= re) { /* this is an offproc row */
7397             for (j = ii[i]; j < ii[i+1]; j++) {
7398               *coi++ = gr;
7399               *idxoff++ = j;
7400             }
7401             if (!cmapt[cp]) { /* already global */
7402               for (j = 0; j < nz; j++) *coj++ = jj[j];
7403             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7404               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7405             } else { /* offdiag */
7406               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7407             }
7408             ncoo_o += nz;
7409           } else { /* this is a local row */
7410             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7411           }
7412         }
7413       }
7414       mmdata->off[cp + 1] = idxoff;
7415       mmdata->own[cp + 1] = idxown;
7416     }
7417 
7418     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7419     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7420     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7421     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7422     ncoo = ncoo_d + ncoo_oown + ncoo2;
7423     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7424     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7425     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7426     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7427     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7428     PetscCall(PetscFree2(coo_i,coo_j));
7429     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7430     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7431     coo_i = coo_i2;
7432     coo_j = coo_j2;
7433   } else { /* no offproc values insertion */
7434     ncoo = ncoo_d;
7435     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7436 
7437     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7438     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7439     PetscCall(PetscSFSetUp(mmdata->sf));
7440   }
7441   mmdata->hasoffproc = hasoffproc;
7442 
7443   /* gather (i,j) of nonzeros inserted locally */
7444   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7445     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7446     PetscInt       *coi = coo_i + ncoo_d;
7447     PetscInt       *coj = coo_j + ncoo_d;
7448     const PetscInt *jj  = mm->j;
7449     const PetscInt *ii  = mm->i;
7450     const PetscInt *cmap = cmapa[cp];
7451     const PetscInt *rmap = rmapa[cp];
7452     const PetscInt mr = mp[cp]->rmap->n;
7453     const PetscInt rs = C->rmap->rstart;
7454     const PetscInt re = C->rmap->rend;
7455     const PetscInt cs = C->cmap->rstart;
7456 
7457     if (mptmp[cp]) continue;
7458     if (rmapt[cp] == 1) { /* consecutive rows */
7459       /* fill coo_i */
7460       for (i = 0; i < mr; i++) {
7461         const PetscInt gr = i + rs;
7462         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7463       }
7464       /* fill coo_j */
7465       if (!cmapt[cp]) { /* type-0, already global */
7466         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7467       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7468         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7469       } else { /* type-2, local to global for sparse columns */
7470         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7471       }
7472       ncoo_d += mm->nz;
7473     } else if (rmapt[cp] == 2) { /* sparse rows */
7474       for (i = 0; i < mr; i++) {
7475         const PetscInt *jj = mm->j + ii[i];
7476         const PetscInt gr  = rmap[i];
7477         const PetscInt nz  = ii[i+1] - ii[i];
7478         if (gr >= rs && gr < re) { /* local rows */
7479           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7480           if (!cmapt[cp]) { /* type-0, already global */
7481             for (j = 0; j < nz; j++) *coj++ = jj[j];
7482           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7483             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7484           } else { /* type-2, local to global for sparse columns */
7485             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7486           }
7487           ncoo_d += nz;
7488         }
7489       }
7490     }
7491   }
7492   if (glob) {
7493     PetscCall(ISRestoreIndices(glob,&globidx));
7494   }
7495   PetscCall(ISDestroy(&glob));
7496   if (P_oth_l2g) {
7497     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7498   }
7499   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7500   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7501   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7502 
7503   /* preallocate with COO data */
7504   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7505   PetscCall(PetscFree2(coo_i,coo_j));
7506   PetscFunctionReturn(0);
7507 }
7508 
7509 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7510 {
7511   Mat_Product *product = mat->product;
7512 #if defined(PETSC_HAVE_DEVICE)
7513   PetscBool    match   = PETSC_FALSE;
7514   PetscBool    usecpu  = PETSC_FALSE;
7515 #else
7516   PetscBool    match   = PETSC_TRUE;
7517 #endif
7518 
7519   PetscFunctionBegin;
7520   MatCheckProduct(mat,1);
7521 #if defined(PETSC_HAVE_DEVICE)
7522   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7523     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7524   }
7525   if (match) { /* we can always fallback to the CPU if requested */
7526     switch (product->type) {
7527     case MATPRODUCT_AB:
7528       if (product->api_user) {
7529         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7530         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7531         PetscOptionsEnd();
7532       } else {
7533         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7534         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7535         PetscOptionsEnd();
7536       }
7537       break;
7538     case MATPRODUCT_AtB:
7539       if (product->api_user) {
7540         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7541         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7542         PetscOptionsEnd();
7543       } else {
7544         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7545         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7546         PetscOptionsEnd();
7547       }
7548       break;
7549     case MATPRODUCT_PtAP:
7550       if (product->api_user) {
7551         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7552         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7553         PetscOptionsEnd();
7554       } else {
7555         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7556         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7557         PetscOptionsEnd();
7558       }
7559       break;
7560     default:
7561       break;
7562     }
7563     match = (PetscBool)!usecpu;
7564   }
7565 #endif
7566   if (match) {
7567     switch (product->type) {
7568     case MATPRODUCT_AB:
7569     case MATPRODUCT_AtB:
7570     case MATPRODUCT_PtAP:
7571       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7572       break;
7573     default:
7574       break;
7575     }
7576   }
7577   /* fallback to MPIAIJ ops */
7578   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7579   PetscFunctionReturn(0);
7580 }
7581 
7582 /*
7583    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7584 
7585    n - the number of block indices in cc[]
7586    cc - the block indices (must be large enough to contain the indices)
7587 */
7588 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
7589 {
7590   PetscInt       cnt = -1,nidx,j;
7591   const PetscInt *idx;
7592 
7593   PetscFunctionBegin;
7594   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
7595   if (nidx) {
7596     cnt = 0;
7597     cc[cnt] = idx[0]/bs;
7598     for (j=1; j<nidx; j++) {
7599       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
7600     }
7601   }
7602   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
7603   *n = cnt+1;
7604   PetscFunctionReturn(0);
7605 }
7606 
7607 /*
7608     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7609 
7610     ncollapsed - the number of block indices
7611     collapsed - the block indices (must be large enough to contain the indices)
7612 */
7613 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
7614 {
7615   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
7616 
7617   PetscFunctionBegin;
7618   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
7619   for (i=start+1; i<start+bs; i++) {
7620     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
7621     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
7622     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
7623   }
7624   *ncollapsed = nprev;
7625   if (collapsed) *collapsed  = cprev;
7626   PetscFunctionReturn(0);
7627 }
7628 
7629 /* -------------------------------------------------------------------------- */
7630 /*
7631  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7632 
7633  Input Parameter:
7634  . Amat - matrix
7635  - symmetrize - make the result symmetric
7636  + scale - scale with diagonal
7637 
7638  Output Parameter:
7639  . a_Gmat - output scalar graph >= 0
7640 
7641  */
7642 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat)
7643 {
7644   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
7645   MPI_Comm       comm;
7646   Mat            Gmat;
7647   PetscBool      ismpiaij,isseqaij;
7648   Mat            a, b, c;
7649   MatType        jtype;
7650 
7651   PetscFunctionBegin;
7652   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
7653   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7654   PetscCall(MatGetSize(Amat, &MM, &NN));
7655   PetscCall(MatGetBlockSize(Amat, &bs));
7656   nloc = (Iend-Istart)/bs;
7657 
7658   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
7659   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
7660   PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
7661 
7662   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7663   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7664      implementation */
7665   if (bs > 1) {
7666     PetscCall(MatGetType(Amat,&jtype));
7667     PetscCall(MatCreate(comm, &Gmat));
7668     PetscCall(MatSetType(Gmat, jtype));
7669     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
7670     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7671     if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) {
7672       PetscInt  *d_nnz, *o_nnz;
7673       MatScalar *aa,val,AA[4096];
7674       PetscInt  *aj,*ai,AJ[4096],nc;
7675       if (isseqaij) { a = Amat; b = NULL; }
7676       else {
7677         Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
7678         a = d->A; b = d->B;
7679       }
7680       PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc));
7681       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7682       for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7683         PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
7684         const PetscInt *cols;
7685         for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
7686           PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
7687           nnz[brow/bs] = jj/bs;
7688           if (jj%bs) ok = 0;
7689           if (cols) j0 = cols[0];
7690           else j0 = -1;
7691           PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
7692           if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
7693           for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
7694             PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
7695             if (jj%bs) ok = 0;
7696             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7697             if (nnz[brow/bs] != jj/bs) ok = 0;
7698             PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
7699           }
7700           if (!ok) {
7701             PetscCall(PetscFree2(d_nnz,o_nnz));
7702             goto old_bs;
7703           }
7704         }
7705         PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
7706       }
7707       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7708       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7709       PetscCall(PetscFree2(d_nnz,o_nnz));
7710       // diag
7711       for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
7712         Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
7713         ai = aseq->i;
7714         n  = ai[brow+1] - ai[brow];
7715         aj = aseq->j + ai[brow];
7716         for (int k=0; k<n; k += bs) { // block columns
7717           AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
7718           val = 0;
7719           for (int ii=0; ii<bs; ii++) { // rows in block
7720             aa = aseq->a + ai[brow+ii] + k;
7721             for (int jj=0; jj<bs; jj++) { // columns in block
7722               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7723             }
7724           }
7725           AA[k/bs] = val;
7726         }
7727         grow = Istart/bs + brow/bs;
7728         PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
7729       }
7730       // off-diag
7731       if (ismpiaij) {
7732         Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
7733         const PetscScalar *vals;
7734         const PetscInt    *cols, *garray = aij->garray;
7735         PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
7736         for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
7737           PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
7738           for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) {
7739             AA[k/bs] = 0;
7740             AJ[cidx] = garray[cols[k]]/bs;
7741           }
7742           nc = ncols/bs;
7743           PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
7744           for (int ii=0; ii<bs; ii++) { // rows in block
7745             PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
7746             for (int k=0; k<ncols; k += bs) {
7747               for (int jj=0; jj<bs; jj++) { // cols in block
7748                 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
7749               }
7750             }
7751             PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
7752           }
7753           grow = Istart/bs + brow/bs;
7754           PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
7755         }
7756       }
7757       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7758       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7759     } else {
7760       const PetscScalar *vals;
7761       const PetscInt    *idx;
7762       PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
7763       old_bs:
7764       /*
7765        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7766        */
7767       PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n"));
7768       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7769       if (isseqaij) {
7770         PetscInt max_d_nnz;
7771         /*
7772          Determine exact preallocation count for (sequential) scalar matrix
7773          */
7774         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
7775         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7776         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7777         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
7778           PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7779         }
7780         PetscCall(PetscFree3(w0,w1,w2));
7781       } else if (ismpiaij) {
7782         Mat            Daij,Oaij;
7783         const PetscInt *garray;
7784         PetscInt       max_d_nnz;
7785         PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
7786         /*
7787          Determine exact preallocation count for diagonal block portion of scalar matrix
7788          */
7789         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
7790         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7791         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7792         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7793           PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7794         }
7795         PetscCall(PetscFree3(w0,w1,w2));
7796         /*
7797          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7798          */
7799         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7800           o_nnz[jj] = 0;
7801           for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
7802             PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7803             o_nnz[jj] += ncols;
7804             PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7805           }
7806           if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
7807         }
7808       } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type");
7809       /* get scalar copy (norms) of matrix */
7810       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7811       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7812       PetscCall(PetscFree2(d_nnz,o_nnz));
7813       for (Ii = Istart; Ii < Iend; Ii++) {
7814         PetscInt dest_row = Ii/bs;
7815         PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
7816         for (jj=0; jj<ncols; jj++) {
7817           PetscInt    dest_col = idx[jj]/bs;
7818           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7819           PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
7820         }
7821         PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
7822       }
7823       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7824       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7825     }
7826   } else {
7827     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7828      procedure via MatAbs API */
7829     /* just copy scalar matrix & abs() */
7830     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7831     if (isseqaij) { a = Gmat; b = NULL; }
7832     else {
7833       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7834       a = d->A; b = d->B;
7835     }
7836     /* abs */
7837     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7838       MatInfo     info;
7839       PetscScalar *avals;
7840       PetscCall(MatGetInfo(c,MAT_LOCAL,&info));
7841       PetscCall(MatSeqAIJGetArray(c,&avals));
7842       for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7843       PetscCall(MatSeqAIJRestoreArray(c,&avals));
7844     }
7845   }
7846   if (symmetrize) {
7847     PetscBool isset,issym;
7848     PetscCall(MatIsSymmetricKnown(Amat,&isset,&issym));
7849     if (!isset || !issym) {
7850       Mat matTrans;
7851       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7852       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7853       PetscCall(MatDestroy(&matTrans));
7854     }
7855     PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE));
7856   } else {
7857     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7858   }
7859   if (scale) {
7860     /* scale c for all diagonal values = 1 or -1 */
7861     Vec               diag;
7862     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7863     PetscCall(MatGetDiagonal(Gmat, diag));
7864     PetscCall(VecReciprocal(diag));
7865     PetscCall(VecSqrtAbs(diag));
7866     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7867     PetscCall(VecDestroy(&diag));
7868   }
7869   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7870   *a_Gmat = Gmat;
7871   PetscFunctionReturn(0);
7872 }
7873 
7874 /* -------------------------------------------------------------------------- */
7875 /*@C
7876    MatFilter_AIJ - filter values with small absolute values
7877      With vfilter < 0 does nothing so should not be called.
7878 
7879    Collective on Mat
7880 
7881    Input Parameters:
7882 +   Gmat - the graph
7883 .   vfilter - threshold parameter [0,1)
7884 
7885  Output Parameter:
7886  .  filteredG - output filtered scalar graph
7887 
7888    Level: developer
7889 
7890    Notes:
7891     This is called before graph coarsers are called.
7892     This could go into Mat, move 'symm' to GAMG
7893 
7894 .seealso: `PCGAMGSetThreshold()`
7895 @*/
7896 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG)
7897 {
7898   PetscInt          Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc;
7899   Mat               tGmat;
7900   MPI_Comm          comm;
7901   const PetscScalar *vals;
7902   const PetscInt    *idx;
7903   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0;
7904   MatScalar         *AA; // this is checked in graph
7905   PetscBool         isseqaij;
7906   Mat               a, b, c;
7907   MatType           jtype;
7908 
7909   PetscFunctionBegin;
7910   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
7911   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij));
7912   PetscCall(MatGetType(Gmat,&jtype));
7913   PetscCall(MatCreate(comm, &tGmat));
7914   PetscCall(MatSetType(tGmat, jtype));
7915 
7916   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7917                Also, if the matrix is symmetric, can we skip this
7918                operation? It can be very expensive on large matrices. */
7919 
7920   // global sizes
7921   PetscCall(MatGetSize(Gmat, &MM, &NN));
7922   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7923   nloc = Iend - Istart;
7924   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
7925   if (isseqaij) { a = Gmat; b = NULL; }
7926   else {
7927     Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7928     a = d->A; b = d->B;
7929     garray = d->garray;
7930   }
7931   /* Determine upper bound on non-zeros needed in new filtered matrix */
7932   for (PetscInt row=0; row < nloc; row++) {
7933     PetscCall(MatGetRow(a,row,&ncols,NULL,NULL));
7934     d_nnz[row] = ncols;
7935     if (ncols>maxcols) maxcols=ncols;
7936     PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL));
7937   }
7938   if (b) {
7939     for (PetscInt row=0; row < nloc; row++) {
7940       PetscCall(MatGetRow(b,row,&ncols,NULL,NULL));
7941       o_nnz[row] = ncols;
7942       if (ncols>maxcols) maxcols=ncols;
7943       PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL));
7944     }
7945   }
7946   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
7947   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7948   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
7949   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
7950   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
7951   PetscCall(PetscFree2(d_nnz,o_nnz));
7952   //
7953   PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ));
7954   nnz0 = nnz1 = 0;
7955   for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7956     for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) {
7957       PetscCall(MatGetRow(c,row,&ncols,&idx,&vals));
7958       for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) {
7959         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7960         if (PetscRealPart(sv) > vfilter) {
7961           nnz1++;
7962           PetscInt cid = idx[jj] + Istart; //diag
7963           if (c!=a) cid = garray[idx[jj]];
7964           AA[ncol_row] = vals[jj];
7965           AJ[ncol_row] = cid;
7966           ncol_row++;
7967         }
7968       }
7969       PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals));
7970       PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES));
7971     }
7972   }
7973   PetscCall(PetscFree2(AA,AJ));
7974   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
7975   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
7976   PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */
7977 
7978   PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n",
7979                       (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter,
7980                       (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols));
7981 
7982   *filteredG = tGmat;
7983   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7984   PetscFunctionReturn(0);
7985 }
7986 
7987 /*
7988     Special version for direct calls from Fortran
7989 */
7990 #include <petsc/private/fortranimpl.h>
7991 
7992 /* Change these macros so can be used in void function */
7993 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7994 #undef  PetscCall
7995 #define PetscCall(...) do {                                                                    \
7996     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7997     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7998       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7999       return;                                                                                  \
8000     }                                                                                          \
8001   } while (0)
8002 
8003 #undef SETERRQ
8004 #define SETERRQ(comm,ierr,...) do {                                                            \
8005     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
8006     return;                                                                                    \
8007   } while (0)
8008 
8009 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8010 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8011 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8012 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8013 #else
8014 #endif
8015 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
8016 {
8017   Mat          mat  = *mmat;
8018   PetscInt     m    = *mm, n = *mn;
8019   InsertMode   addv = *maddv;
8020   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
8021   PetscScalar  value;
8022 
8023   MatCheckPreallocated(mat,1);
8024   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8025   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
8026   {
8027     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
8028     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
8029     PetscBool roworiented = aij->roworiented;
8030 
8031     /* Some Variables required in the macro */
8032     Mat        A                    = aij->A;
8033     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
8034     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
8035     MatScalar  *aa;
8036     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8037     Mat        B                    = aij->B;
8038     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
8039     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
8040     MatScalar  *ba;
8041     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8042      * cannot use "#if defined" inside a macro. */
8043     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8044 
8045     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
8046     PetscInt  nonew = a->nonew;
8047     MatScalar *ap1,*ap2;
8048 
8049     PetscFunctionBegin;
8050     PetscCall(MatSeqAIJGetArray(A,&aa));
8051     PetscCall(MatSeqAIJGetArray(B,&ba));
8052     for (i=0; i<m; i++) {
8053       if (im[i] < 0) continue;
8054       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
8055       if (im[i] >= rstart && im[i] < rend) {
8056         row      = im[i] - rstart;
8057         lastcol1 = -1;
8058         rp1      = aj + ai[row];
8059         ap1      = aa + ai[row];
8060         rmax1    = aimax[row];
8061         nrow1    = ailen[row];
8062         low1     = 0;
8063         high1    = nrow1;
8064         lastcol2 = -1;
8065         rp2      = bj + bi[row];
8066         ap2      = ba + bi[row];
8067         rmax2    = bimax[row];
8068         nrow2    = bilen[row];
8069         low2     = 0;
8070         high2    = nrow2;
8071 
8072         for (j=0; j<n; j++) {
8073           if (roworiented) value = v[i*n+j];
8074           else value = v[i+j*m];
8075           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8076           if (in[j] >= cstart && in[j] < cend) {
8077             col = in[j] - cstart;
8078             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
8079           } else if (in[j] < 0) continue;
8080           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8081             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8082             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
8083           } else {
8084             if (mat->was_assembled) {
8085               if (!aij->colmap) {
8086                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8087               }
8088 #if defined(PETSC_USE_CTABLE)
8089               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
8090               col--;
8091 #else
8092               col = aij->colmap[in[j]] - 1;
8093 #endif
8094               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
8095                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8096                 col  =  in[j];
8097                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8098                 B        = aij->B;
8099                 b        = (Mat_SeqAIJ*)B->data;
8100                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
8101                 rp2      = bj + bi[row];
8102                 ap2      = ba + bi[row];
8103                 rmax2    = bimax[row];
8104                 nrow2    = bilen[row];
8105                 low2     = 0;
8106                 high2    = nrow2;
8107                 bm       = aij->B->rmap->n;
8108                 ba       = b->a;
8109                 inserted = PETSC_FALSE;
8110               }
8111             } else col = in[j];
8112             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
8113           }
8114         }
8115       } else if (!aij->donotstash) {
8116         if (roworiented) {
8117           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8118         } else {
8119           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8120         }
8121       }
8122     }
8123     PetscCall(MatSeqAIJRestoreArray(A,&aa));
8124     PetscCall(MatSeqAIJRestoreArray(B,&ba));
8125   }
8126   PetscFunctionReturnVoid();
8127 }
8128 
8129 /* Undefining these here since they were redefined from their original definition above! No
8130  * other PETSc functions should be defined past this point, as it is impossible to recover the
8131  * original definitions */
8132 #undef PetscCall
8133 #undef SETERRQ
8134