xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 76be6f4ff3bd4e251c19fc00ebbebfd58b6e7589)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
21 {
22   Mat            B;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
26   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
27   PetscCall(MatDestroy(&B));
28   PetscFunctionReturn(0);
29 }
30 
31 /*MC
32    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
33 
34    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
35    and MATMPIAIJ otherwise.  As a result, for single process communicators,
36   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
37   for communicators controlling multiple processes.  It is recommended that you call both of
38   the above preallocation routines for simplicity.
39 
40    Options Database Keys:
41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
42 
43   Developer Notes:
44     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
45    enough exist.
46 
47   Level: beginner
48 
49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
50 M*/
51 
52 /*MC
53    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
54 
55    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
56    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
57    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
58   for communicators controlling multiple processes.  It is recommended that you call both of
59   the above preallocation routines for simplicity.
60 
61    Options Database Keys:
62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
63 
64   Level: beginner
65 
66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
67 M*/
68 
69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
70 {
71   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
72 
73   PetscFunctionBegin;
74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
75   A->boundtocpu = flg;
76 #endif
77   if (a->A) {
78     PetscCall(MatBindToCPU(a->A,flg));
79   }
80   if (a->B) {
81     PetscCall(MatBindToCPU(a->B,flg));
82   }
83 
84   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
85    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
86    * to differ from the parent matrix. */
87   if (a->lvec) {
88     PetscCall(VecBindToCPU(a->lvec,flg));
89   }
90   if (a->diag) {
91     PetscCall(VecBindToCPU(a->diag,flg));
92   }
93 
94   PetscFunctionReturn(0);
95 }
96 
97 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
98 {
99   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
100 
101   PetscFunctionBegin;
102   if (mat->A) {
103     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
104     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
110 {
111   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
112   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
113   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
114   const PetscInt  *ia,*ib;
115   const MatScalar *aa,*bb,*aav,*bav;
116   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
117   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
118 
119   PetscFunctionBegin;
120   *keptrows = NULL;
121 
122   ia   = a->i;
123   ib   = b->i;
124   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
125   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
126   for (i=0; i<m; i++) {
127     na = ia[i+1] - ia[i];
128     nb = ib[i+1] - ib[i];
129     if (!na && !nb) {
130       cnt++;
131       goto ok1;
132     }
133     aa = aav + ia[i];
134     for (j=0; j<na; j++) {
135       if (aa[j] != 0.0) goto ok1;
136     }
137     bb = bav + ib[i];
138     for (j=0; j <nb; j++) {
139       if (bb[j] != 0.0) goto ok1;
140     }
141     cnt++;
142 ok1:;
143   }
144   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
145   if (!n0rows) {
146     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
147     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
148     PetscFunctionReturn(0);
149   }
150   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
151   cnt  = 0;
152   for (i=0; i<m; i++) {
153     na = ia[i+1] - ia[i];
154     nb = ib[i+1] - ib[i];
155     if (!na && !nb) continue;
156     aa = aav + ia[i];
157     for (j=0; j<na;j++) {
158       if (aa[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163     bb = bav + ib[i];
164     for (j=0; j<nb; j++) {
165       if (bb[j] != 0.0) {
166         rows[cnt++] = rstart + i;
167         goto ok2;
168       }
169     }
170 ok2:;
171   }
172   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
173   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
174   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
175   PetscFunctionReturn(0);
176 }
177 
178 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
179 {
180   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
181   PetscBool         cong;
182 
183   PetscFunctionBegin;
184   PetscCall(MatHasCongruentLayouts(Y,&cong));
185   if (Y->assembled && cong) {
186     PetscCall(MatDiagonalSet(aij->A,D,is));
187   } else {
188     PetscCall(MatDiagonalSet_Default(Y,D,is));
189   }
190   PetscFunctionReturn(0);
191 }
192 
193 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
194 {
195   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
196   PetscInt       i,rstart,nrows,*rows;
197 
198   PetscFunctionBegin;
199   *zrows = NULL;
200   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
201   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
202   for (i=0; i<nrows; i++) rows[i] += rstart;
203   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
204   PetscFunctionReturn(0);
205 }
206 
207 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
208 {
209   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
210   PetscInt          i,m,n,*garray = aij->garray;
211   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
212   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
213   PetscReal         *work;
214   const PetscScalar *dummy;
215 
216   PetscFunctionBegin;
217   PetscCall(MatGetSize(A,&m,&n));
218   PetscCall(PetscCalloc1(n,&work));
219   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
220   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
221   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
222   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
223   if (type == NORM_2) {
224     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
225       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
226     }
227     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
228       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
229     }
230   } else if (type == NORM_1) {
231     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
232       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
233     }
234     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
235       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
236     }
237   } else if (type == NORM_INFINITY) {
238     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
239       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
240     }
241     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
242       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
243     }
244   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
245     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
246       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
247     }
248     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
249       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
250     }
251   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
252     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
253       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
254     }
255     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
256       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
257     }
258   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
259   if (type == NORM_INFINITY) {
260     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
261   } else {
262     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
263   }
264   PetscCall(PetscFree(work));
265   if (type == NORM_2) {
266     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
267   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
268     for (i=0; i<n; i++) reductions[i] /= m;
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
274 {
275   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
276   IS              sis,gis;
277   const PetscInt  *isis,*igis;
278   PetscInt        n,*iis,nsis,ngis,rstart,i;
279 
280   PetscFunctionBegin;
281   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
282   PetscCall(MatFindNonzeroRows(a->B,&gis));
283   PetscCall(ISGetSize(gis,&ngis));
284   PetscCall(ISGetSize(sis,&nsis));
285   PetscCall(ISGetIndices(sis,&isis));
286   PetscCall(ISGetIndices(gis,&igis));
287 
288   PetscCall(PetscMalloc1(ngis+nsis,&iis));
289   PetscCall(PetscArraycpy(iis,igis,ngis));
290   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
291   n    = ngis + nsis;
292   PetscCall(PetscSortRemoveDupsInt(&n,iis));
293   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
294   for (i=0; i<n; i++) iis[i] += rstart;
295   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
296 
297   PetscCall(ISRestoreIndices(sis,&isis));
298   PetscCall(ISRestoreIndices(gis,&igis));
299   PetscCall(ISDestroy(&sis));
300   PetscCall(ISDestroy(&gis));
301   PetscFunctionReturn(0);
302 }
303 
304 /*
305   Local utility routine that creates a mapping from the global column
306 number to the local number in the off-diagonal part of the local
307 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
308 a slightly higher hash table cost; without it it is not scalable (each processor
309 has an order N integer array but is fast to access.
310 */
311 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   PetscInt       n = aij->B->cmap->n,i;
315 
316   PetscFunctionBegin;
317   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
318 #if defined(PETSC_USE_CTABLE)
319   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
320   for (i=0; i<n; i++) {
321     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
322   }
323 #else
324   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
325   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
326   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
327 #endif
328   PetscFunctionReturn(0);
329 }
330 
331 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
332 { \
333     if (col <= lastcol1)  low1 = 0;     \
334     else                 high1 = nrow1; \
335     lastcol1 = col;\
336     while (high1-low1 > 5) { \
337       t = (low1+high1)/2; \
338       if (rp1[t] > col) high1 = t; \
339       else              low1  = t; \
340     } \
341       for (_i=low1; _i<high1; _i++) { \
342         if (rp1[_i] > col) break; \
343         if (rp1[_i] == col) { \
344           if (addv == ADD_VALUES) { \
345             ap1[_i] += value;   \
346             /* Not sure LogFlops will slow dow the code or not */ \
347             (void)PetscLogFlops(1.0);   \
348            } \
349           else                    ap1[_i] = value; \
350           goto a_noinsert; \
351         } \
352       }  \
353       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
354       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
355       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
356       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
357       N = nrow1++ - 1; a->nz++; high1++; \
358       /* shift up all the later entries in this row */ \
359       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
360       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
361       rp1[_i] = col;  \
362       ap1[_i] = value;  \
363       A->nonzerostate++;\
364       a_noinsert: ; \
365       ailen[row] = nrow1; \
366 }
367 
368 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
369   { \
370     if (col <= lastcol2) low2 = 0;                        \
371     else high2 = nrow2;                                   \
372     lastcol2 = col;                                       \
373     while (high2-low2 > 5) {                              \
374       t = (low2+high2)/2;                                 \
375       if (rp2[t] > col) high2 = t;                        \
376       else             low2  = t;                         \
377     }                                                     \
378     for (_i=low2; _i<high2; _i++) {                       \
379       if (rp2[_i] > col) break;                           \
380       if (rp2[_i] == col) {                               \
381         if (addv == ADD_VALUES) {                         \
382           ap2[_i] += value;                               \
383           (void)PetscLogFlops(1.0);                       \
384         }                                                 \
385         else                    ap2[_i] = value;          \
386         goto b_noinsert;                                  \
387       }                                                   \
388     }                                                     \
389     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
390     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
391     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
392     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
393     N = nrow2++ - 1; b->nz++; high2++;                    \
394     /* shift up all the later entries in this row */      \
395     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
396     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
397     rp2[_i] = col;                                        \
398     ap2[_i] = value;                                      \
399     B->nonzerostate++;                                    \
400     b_noinsert: ;                                         \
401     bilen[row] = nrow2;                                   \
402   }
403 
404 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
405 {
406   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
407   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
408   PetscInt       l,*garray = mat->garray,diag;
409   PetscScalar    *aa,*ba;
410 
411   PetscFunctionBegin;
412   /* code only works for square matrices A */
413 
414   /* find size of row to the left of the diagonal part */
415   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
416   row  = row - diag;
417   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
418     if (garray[b->j[b->i[row]+l]] > diag) break;
419   }
420   if (l) {
421     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
422     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
423     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
424   }
425 
426   /* diagonal part */
427   if (a->i[row+1]-a->i[row]) {
428     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
429     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
430     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
431   }
432 
433   /* right of diagonal part */
434   if (b->i[row+1]-b->i[row]-l) {
435     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
436     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
437     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
438   }
439   PetscFunctionReturn(0);
440 }
441 
442 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
443 {
444   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
445   PetscScalar    value = 0.0;
446   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
447   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
448   PetscBool      roworiented = aij->roworiented;
449 
450   /* Some Variables required in the macro */
451   Mat        A                    = aij->A;
452   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
453   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
454   PetscBool  ignorezeroentries    = a->ignorezeroentries;
455   Mat        B                    = aij->B;
456   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
457   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
458   MatScalar  *aa,*ba;
459   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
460   PetscInt   nonew;
461   MatScalar  *ap1,*ap2;
462 
463   PetscFunctionBegin;
464   PetscCall(MatSeqAIJGetArray(A,&aa));
465   PetscCall(MatSeqAIJGetArray(B,&ba));
466   for (i=0; i<m; i++) {
467     if (im[i] < 0) continue;
468     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
469     if (im[i] >= rstart && im[i] < rend) {
470       row      = im[i] - rstart;
471       lastcol1 = -1;
472       rp1      = aj + ai[row];
473       ap1      = aa + ai[row];
474       rmax1    = aimax[row];
475       nrow1    = ailen[row];
476       low1     = 0;
477       high1    = nrow1;
478       lastcol2 = -1;
479       rp2      = bj + bi[row];
480       ap2      = ba + bi[row];
481       rmax2    = bimax[row];
482       nrow2    = bilen[row];
483       low2     = 0;
484       high2    = nrow2;
485 
486       for (j=0; j<n; j++) {
487         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
488         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
489         if (in[j] >= cstart && in[j] < cend) {
490           col   = in[j] - cstart;
491           nonew = a->nonew;
492           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
493         } else if (in[j] < 0) continue;
494         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
495         else {
496           if (mat->was_assembled) {
497             if (!aij->colmap) {
498               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
499             }
500 #if defined(PETSC_USE_CTABLE)
501             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
502             col--;
503 #else
504             col = aij->colmap[in[j]] - 1;
505 #endif
506             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
507               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
508               col  =  in[j];
509               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
510               B        = aij->B;
511               b        = (Mat_SeqAIJ*)B->data;
512               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
513               rp2      = bj + bi[row];
514               ap2      = ba + bi[row];
515               rmax2    = bimax[row];
516               nrow2    = bilen[row];
517               low2     = 0;
518               high2    = nrow2;
519               bm       = aij->B->rmap->n;
520               ba       = b->a;
521             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
522               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
523                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
524               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
525             }
526           } else col = in[j];
527           nonew = b->nonew;
528           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
529         }
530       }
531     } else {
532       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
533       if (!aij->donotstash) {
534         mat->assembled = PETSC_FALSE;
535         if (roworiented) {
536           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
537         } else {
538           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
539         }
540       }
541     }
542   }
543   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
544   PetscCall(MatSeqAIJRestoreArray(B,&ba));
545   PetscFunctionReturn(0);
546 }
547 
548 /*
549     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
550     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
551     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
552 */
553 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
554 {
555   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
556   Mat            A           = aij->A; /* diagonal part of the matrix */
557   Mat            B           = aij->B; /* offdiagonal part of the matrix */
558   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
559   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
560   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
561   PetscInt       *ailen      = a->ilen,*aj = a->j;
562   PetscInt       *bilen      = b->ilen,*bj = b->j;
563   PetscInt       am          = aij->A->rmap->n,j;
564   PetscInt       diag_so_far = 0,dnz;
565   PetscInt       offd_so_far = 0,onz;
566 
567   PetscFunctionBegin;
568   /* Iterate over all rows of the matrix */
569   for (j=0; j<am; j++) {
570     dnz = onz = 0;
571     /*  Iterate over all non-zero columns of the current row */
572     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
573       /* If column is in the diagonal */
574       if (mat_j[col] >= cstart && mat_j[col] < cend) {
575         aj[diag_so_far++] = mat_j[col] - cstart;
576         dnz++;
577       } else { /* off-diagonal entries */
578         bj[offd_so_far++] = mat_j[col];
579         onz++;
580       }
581     }
582     ailen[j] = dnz;
583     bilen[j] = onz;
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 /*
589     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
590     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
591     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
592     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
593     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
594 */
595 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
596 {
597   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
598   Mat            A      = aij->A; /* diagonal part of the matrix */
599   Mat            B      = aij->B; /* offdiagonal part of the matrix */
600   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
601   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
602   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
603   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
604   PetscInt       *ailen = a->ilen,*aj = a->j;
605   PetscInt       *bilen = b->ilen,*bj = b->j;
606   PetscInt       am     = aij->A->rmap->n,j;
607   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
608   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
609   PetscScalar    *aa = a->a,*ba = b->a;
610 
611   PetscFunctionBegin;
612   /* Iterate over all rows of the matrix */
613   for (j=0; j<am; j++) {
614     dnz_row = onz_row = 0;
615     rowstart_offd = full_offd_i[j];
616     rowstart_diag = full_diag_i[j];
617     /*  Iterate over all non-zero columns of the current row */
618     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
619       /* If column is in the diagonal */
620       if (mat_j[col] >= cstart && mat_j[col] < cend) {
621         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
622         aa[rowstart_diag+dnz_row] = mat_a[col];
623         dnz_row++;
624       } else { /* off-diagonal entries */
625         bj[rowstart_offd+onz_row] = mat_j[col];
626         ba[rowstart_offd+onz_row] = mat_a[col];
627         onz_row++;
628       }
629     }
630     ailen[j] = dnz_row;
631     bilen[j] = onz_row;
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
640   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
641 
642   PetscFunctionBegin;
643   for (i=0; i<m; i++) {
644     if (idxm[i] < 0) continue; /* negative row */
645     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
646     if (idxm[i] >= rstart && idxm[i] < rend) {
647       row = idxm[i] - rstart;
648       for (j=0; j<n; j++) {
649         if (idxn[j] < 0) continue; /* negative column */
650         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
651         if (idxn[j] >= cstart && idxn[j] < cend) {
652           col  = idxn[j] - cstart;
653           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
654         } else {
655           if (!aij->colmap) {
656             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
657           }
658 #if defined(PETSC_USE_CTABLE)
659           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
660           col--;
661 #else
662           col = aij->colmap[idxn[j]] - 1;
663 #endif
664           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
665           else {
666             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
667           }
668         }
669       }
670     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
676 {
677   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
678   PetscInt       nstash,reallocs;
679 
680   PetscFunctionBegin;
681   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
682 
683   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
684   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
685   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
686   PetscFunctionReturn(0);
687 }
688 
689 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
690 {
691   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
692   PetscMPIInt    n;
693   PetscInt       i,j,rstart,ncols,flg;
694   PetscInt       *row,*col;
695   PetscBool      other_disassembled;
696   PetscScalar    *val;
697 
698   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
699 
700   PetscFunctionBegin;
701   if (!aij->donotstash && !mat->nooffprocentries) {
702     while (1) {
703       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
704       if (!flg) break;
705 
706       for (i=0; i<n;) {
707         /* Now identify the consecutive vals belonging to the same row */
708         for (j=i,rstart=row[j]; j<n; j++) {
709           if (row[j] != rstart) break;
710         }
711         if (j < n) ncols = j-i;
712         else       ncols = n-i;
713         /* Now assemble all these values with a single function call */
714         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
715         i    = j;
716       }
717     }
718     PetscCall(MatStashScatterEnd_Private(&mat->stash));
719   }
720 #if defined(PETSC_HAVE_DEVICE)
721   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
722   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
723   if (mat->boundtocpu) {
724     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
725     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
726   }
727 #endif
728   PetscCall(MatAssemblyBegin(aij->A,mode));
729   PetscCall(MatAssemblyEnd(aij->A,mode));
730 
731   /* determine if any processor has disassembled, if so we must
732      also disassemble ourself, in order that we may reassemble. */
733   /*
734      if nonzero structure of submatrix B cannot change then we know that
735      no processor disassembled thus we can skip this stuff
736   */
737   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
738     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
739     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
740       PetscCall(MatDisAssemble_MPIAIJ(mat));
741     }
742   }
743   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
744     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
745   }
746   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
747 #if defined(PETSC_HAVE_DEVICE)
748   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
749 #endif
750   PetscCall(MatAssemblyBegin(aij->B,mode));
751   PetscCall(MatAssemblyEnd(aij->B,mode));
752 
753   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
754 
755   aij->rowvalues = NULL;
756 
757   PetscCall(VecDestroy(&aij->diag));
758 
759   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
760   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
761     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
762     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
763   }
764 #if defined(PETSC_HAVE_DEVICE)
765   mat->offloadmask = PETSC_OFFLOAD_BOTH;
766 #endif
767   PetscFunctionReturn(0);
768 }
769 
770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
771 {
772   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
773 
774   PetscFunctionBegin;
775   PetscCall(MatZeroEntries(l->A));
776   PetscCall(MatZeroEntries(l->B));
777   PetscFunctionReturn(0);
778 }
779 
780 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
781 {
782   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
783   PetscObjectState sA, sB;
784   PetscInt        *lrows;
785   PetscInt         r, len;
786   PetscBool        cong, lch, gch;
787 
788   PetscFunctionBegin;
789   /* get locally owned rows */
790   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
791   PetscCall(MatHasCongruentLayouts(A,&cong));
792   /* fix right hand side if needed */
793   if (x && b) {
794     const PetscScalar *xx;
795     PetscScalar       *bb;
796 
797     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
798     PetscCall(VecGetArrayRead(x, &xx));
799     PetscCall(VecGetArray(b, &bb));
800     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
801     PetscCall(VecRestoreArrayRead(x, &xx));
802     PetscCall(VecRestoreArray(b, &bb));
803   }
804 
805   sA = mat->A->nonzerostate;
806   sB = mat->B->nonzerostate;
807 
808   if (diag != 0.0 && cong) {
809     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
810     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
811   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
812     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
813     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
814     PetscInt   nnwA, nnwB;
815     PetscBool  nnzA, nnzB;
816 
817     nnwA = aijA->nonew;
818     nnwB = aijB->nonew;
819     nnzA = aijA->keepnonzeropattern;
820     nnzB = aijB->keepnonzeropattern;
821     if (!nnzA) {
822       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
823       aijA->nonew = 0;
824     }
825     if (!nnzB) {
826       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
827       aijB->nonew = 0;
828     }
829     /* Must zero here before the next loop */
830     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
831     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
832     for (r = 0; r < len; ++r) {
833       const PetscInt row = lrows[r] + A->rmap->rstart;
834       if (row >= A->cmap->N) continue;
835       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
836     }
837     aijA->nonew = nnwA;
838     aijB->nonew = nnwB;
839   } else {
840     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
841     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
842   }
843   PetscCall(PetscFree(lrows));
844   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
845   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
846 
847   /* reduce nonzerostate */
848   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
849   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
850   if (gch) A->nonzerostate++;
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
855 {
856   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
857   PetscMPIInt       n = A->rmap->n;
858   PetscInt          i,j,r,m,len = 0;
859   PetscInt          *lrows,*owners = A->rmap->range;
860   PetscMPIInt       p = 0;
861   PetscSFNode       *rrows;
862   PetscSF           sf;
863   const PetscScalar *xx;
864   PetscScalar       *bb,*mask,*aij_a;
865   Vec               xmask,lmask;
866   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
867   const PetscInt    *aj, *ii,*ridx;
868   PetscScalar       *aa;
869 
870   PetscFunctionBegin;
871   /* Create SF where leaves are input rows and roots are owned rows */
872   PetscCall(PetscMalloc1(n, &lrows));
873   for (r = 0; r < n; ++r) lrows[r] = -1;
874   PetscCall(PetscMalloc1(N, &rrows));
875   for (r = 0; r < N; ++r) {
876     const PetscInt idx   = rows[r];
877     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
878     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
879       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
880     }
881     rrows[r].rank  = p;
882     rrows[r].index = rows[r] - owners[p];
883   }
884   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
885   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
886   /* Collect flags for rows to be zeroed */
887   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
888   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
889   PetscCall(PetscSFDestroy(&sf));
890   /* Compress and put in row numbers */
891   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
892   /* zero diagonal part of matrix */
893   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
894   /* handle off diagonal part of matrix */
895   PetscCall(MatCreateVecs(A,&xmask,NULL));
896   PetscCall(VecDuplicate(l->lvec,&lmask));
897   PetscCall(VecGetArray(xmask,&bb));
898   for (i=0; i<len; i++) bb[lrows[i]] = 1;
899   PetscCall(VecRestoreArray(xmask,&bb));
900   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
901   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
902   PetscCall(VecDestroy(&xmask));
903   if (x && b) { /* this code is buggy when the row and column layout don't match */
904     PetscBool cong;
905 
906     PetscCall(MatHasCongruentLayouts(A,&cong));
907     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
908     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
909     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
910     PetscCall(VecGetArrayRead(l->lvec,&xx));
911     PetscCall(VecGetArray(b,&bb));
912   }
913   PetscCall(VecGetArray(lmask,&mask));
914   /* remove zeroed rows of off diagonal matrix */
915   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
916   ii = aij->i;
917   for (i=0; i<len; i++) {
918     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
919   }
920   /* loop over all elements of off process part of matrix zeroing removed columns*/
921   if (aij->compressedrow.use) {
922     m    = aij->compressedrow.nrows;
923     ii   = aij->compressedrow.i;
924     ridx = aij->compressedrow.rindex;
925     for (i=0; i<m; i++) {
926       n  = ii[i+1] - ii[i];
927       aj = aij->j + ii[i];
928       aa = aij_a + ii[i];
929 
930       for (j=0; j<n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[*ridx] -= *aa*xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938       ridx++;
939     }
940   } else { /* do not use compressed row format */
941     m = l->B->rmap->n;
942     for (i=0; i<m; i++) {
943       n  = ii[i+1] - ii[i];
944       aj = aij->j + ii[i];
945       aa = aij_a + ii[i];
946       for (j=0; j<n; j++) {
947         if (PetscAbsScalar(mask[*aj])) {
948           if (b) bb[i] -= *aa*xx[*aj];
949           *aa = 0.0;
950         }
951         aa++;
952         aj++;
953       }
954     }
955   }
956   if (x && b) {
957     PetscCall(VecRestoreArray(b,&bb));
958     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
959   }
960   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
961   PetscCall(VecRestoreArray(lmask,&mask));
962   PetscCall(VecDestroy(&lmask));
963   PetscCall(PetscFree(lrows));
964 
965   /* only change matrix nonzero state if pattern was allowed to be changed */
966   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
967     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
968     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
974 {
975   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
976   PetscInt       nt;
977   VecScatter     Mvctx = a->Mvctx;
978 
979   PetscFunctionBegin;
980   PetscCall(VecGetLocalSize(xx,&nt));
981   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
982   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
983   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
984   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
985   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992 
993   PetscFunctionBegin;
994   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
995   PetscFunctionReturn(0);
996 }
997 
998 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   VecScatter     Mvctx = a->Mvctx;
1002 
1003   PetscFunctionBegin;
1004   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1005   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
1006   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1007   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1014 
1015   PetscFunctionBegin;
1016   /* do nondiagonal part */
1017   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1018   /* do local part */
1019   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1020   /* add partial results together */
1021   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1022   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1027 {
1028   MPI_Comm       comm;
1029   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1030   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1031   IS             Me,Notme;
1032   PetscInt       M,N,first,last,*notme,i;
1033   PetscBool      lf;
1034   PetscMPIInt    size;
1035 
1036   PetscFunctionBegin;
1037   /* Easy test: symmetric diagonal block */
1038   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1039   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1040   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1041   if (!*f) PetscFunctionReturn(0);
1042   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1043   PetscCallMPI(MPI_Comm_size(comm,&size));
1044   if (size == 1) PetscFunctionReturn(0);
1045 
1046   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1047   PetscCall(MatGetSize(Amat,&M,&N));
1048   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1049   PetscCall(PetscMalloc1(N-last+first,&notme));
1050   for (i=0; i<first; i++) notme[i] = i;
1051   for (i=last; i<M; i++) notme[i-last+first] = i;
1052   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1053   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1054   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1055   Aoff = Aoffs[0];
1056   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1057   Boff = Boffs[0];
1058   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1059   PetscCall(MatDestroyMatrices(1,&Aoffs));
1060   PetscCall(MatDestroyMatrices(1,&Boffs));
1061   PetscCall(ISDestroy(&Me));
1062   PetscCall(ISDestroy(&Notme));
1063   PetscCall(PetscFree(notme));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1068 {
1069   PetscFunctionBegin;
1070   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1075 {
1076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1077 
1078   PetscFunctionBegin;
1079   /* do nondiagonal part */
1080   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1081   /* do local part */
1082   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1083   /* add partial results together */
1084   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1085   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 /*
1090   This only works correctly for square matrices where the subblock A->A is the
1091    diagonal block
1092 */
1093 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096 
1097   PetscFunctionBegin;
1098   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1099   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1100   PetscCall(MatGetDiagonal(a->A,v));
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1105 {
1106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1107 
1108   PetscFunctionBegin;
1109   PetscCall(MatScale(a->A,aa));
1110   PetscCall(MatScale(a->B,aa));
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1115 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1116 {
1117   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1118 
1119   PetscFunctionBegin;
1120   PetscCall(PetscSFDestroy(&aij->coo_sf));
1121   PetscCall(PetscFree(aij->Aperm1));
1122   PetscCall(PetscFree(aij->Bperm1));
1123   PetscCall(PetscFree(aij->Ajmap1));
1124   PetscCall(PetscFree(aij->Bjmap1));
1125 
1126   PetscCall(PetscFree(aij->Aimap2));
1127   PetscCall(PetscFree(aij->Bimap2));
1128   PetscCall(PetscFree(aij->Aperm2));
1129   PetscCall(PetscFree(aij->Bperm2));
1130   PetscCall(PetscFree(aij->Ajmap2));
1131   PetscCall(PetscFree(aij->Bjmap2));
1132 
1133   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1134   PetscCall(PetscFree(aij->Cperm1));
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1139 {
1140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1141 
1142   PetscFunctionBegin;
1143 #if defined(PETSC_USE_LOG)
1144   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1145 #endif
1146   PetscCall(MatStashDestroy_Private(&mat->stash));
1147   PetscCall(VecDestroy(&aij->diag));
1148   PetscCall(MatDestroy(&aij->A));
1149   PetscCall(MatDestroy(&aij->B));
1150 #if defined(PETSC_USE_CTABLE)
1151   PetscCall(PetscTableDestroy(&aij->colmap));
1152 #else
1153   PetscCall(PetscFree(aij->colmap));
1154 #endif
1155   PetscCall(PetscFree(aij->garray));
1156   PetscCall(VecDestroy(&aij->lvec));
1157   PetscCall(VecScatterDestroy(&aij->Mvctx));
1158   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1159   PetscCall(PetscFree(aij->ld));
1160 
1161   /* Free COO */
1162   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1163 
1164   PetscCall(PetscFree(mat->data));
1165 
1166   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1167   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1168 
1169   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1173   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1174   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1179 #if defined(PETSC_HAVE_CUDA)
1180   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1181 #endif
1182 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1183   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1184 #endif
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1188 #endif
1189 #if defined(PETSC_HAVE_SCALAPACK)
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1191 #endif
1192 #if defined(PETSC_HAVE_HYPRE)
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1195 #endif
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1198   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1202 #if defined(PETSC_HAVE_MKL_SPARSE)
1203   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1204 #endif
1205   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1206   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1207   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1208   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1209   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1214 {
1215   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1216   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1217   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1218   const PetscInt    *garray = aij->garray;
1219   const PetscScalar *aa,*ba;
1220   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1221   PetscInt          *rowlens;
1222   PetscInt          *colidxs;
1223   PetscScalar       *matvals;
1224 
1225   PetscFunctionBegin;
1226   PetscCall(PetscViewerSetUp(viewer));
1227 
1228   M  = mat->rmap->N;
1229   N  = mat->cmap->N;
1230   m  = mat->rmap->n;
1231   rs = mat->rmap->rstart;
1232   cs = mat->cmap->rstart;
1233   nz = A->nz + B->nz;
1234 
1235   /* write matrix header */
1236   header[0] = MAT_FILE_CLASSID;
1237   header[1] = M; header[2] = N; header[3] = nz;
1238   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1239   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1240 
1241   /* fill in and store row lengths  */
1242   PetscCall(PetscMalloc1(m,&rowlens));
1243   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1244   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1245   PetscCall(PetscFree(rowlens));
1246 
1247   /* fill in and store column indices */
1248   PetscCall(PetscMalloc1(nz,&colidxs));
1249   for (cnt=0, i=0; i<m; i++) {
1250     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1251       if (garray[B->j[jb]] > cs) break;
1252       colidxs[cnt++] = garray[B->j[jb]];
1253     }
1254     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1255       colidxs[cnt++] = A->j[ja] + cs;
1256     for (; jb<B->i[i+1]; jb++)
1257       colidxs[cnt++] = garray[B->j[jb]];
1258   }
1259   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1260   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1261   PetscCall(PetscFree(colidxs));
1262 
1263   /* fill in and store nonzero values */
1264   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1265   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1266   PetscCall(PetscMalloc1(nz,&matvals));
1267   for (cnt=0, i=0; i<m; i++) {
1268     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1269       if (garray[B->j[jb]] > cs) break;
1270       matvals[cnt++] = ba[jb];
1271     }
1272     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1273       matvals[cnt++] = aa[ja];
1274     for (; jb<B->i[i+1]; jb++)
1275       matvals[cnt++] = ba[jb];
1276   }
1277   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1278   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1279   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1280   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1281   PetscCall(PetscFree(matvals));
1282 
1283   /* write block size option to the viewer's .info file */
1284   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #include <petscdraw.h>
1289 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1292   PetscMPIInt       rank = aij->rank,size = aij->size;
1293   PetscBool         isdraw,iascii,isbinary;
1294   PetscViewer       sviewer;
1295   PetscViewerFormat format;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1299   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1300   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1301   if (iascii) {
1302     PetscCall(PetscViewerGetFormat(viewer,&format));
1303     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1304       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1305       PetscCall(PetscMalloc1(size,&nz));
1306       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1307       for (i=0; i<(PetscInt)size; i++) {
1308         nmax = PetscMax(nmax,nz[i]);
1309         nmin = PetscMin(nmin,nz[i]);
1310         navg += nz[i];
1311       }
1312       PetscCall(PetscFree(nz));
1313       navg = navg/size;
1314       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1315       PetscFunctionReturn(0);
1316     }
1317     PetscCall(PetscViewerGetFormat(viewer,&format));
1318     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1319       MatInfo   info;
1320       PetscInt *inodes=NULL;
1321 
1322       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1323       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1324       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1325       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1326       if (!inodes) {
1327         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1328                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1329       } else {
1330         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1331                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1332       }
1333       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1334       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1335       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1336       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1337       PetscCall(PetscViewerFlush(viewer));
1338       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1339       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1340       PetscCall(VecScatterView(aij->Mvctx,viewer));
1341       PetscFunctionReturn(0);
1342     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1343       PetscInt inodecount,inodelimit,*inodes;
1344       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1345       if (inodes) {
1346         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1347       } else {
1348         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1349       }
1350       PetscFunctionReturn(0);
1351     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1352       PetscFunctionReturn(0);
1353     }
1354   } else if (isbinary) {
1355     if (size == 1) {
1356       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1357       PetscCall(MatView(aij->A,viewer));
1358     } else {
1359       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1360     }
1361     PetscFunctionReturn(0);
1362   } else if (iascii && size == 1) {
1363     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1364     PetscCall(MatView(aij->A,viewer));
1365     PetscFunctionReturn(0);
1366   } else if (isdraw) {
1367     PetscDraw draw;
1368     PetscBool isnull;
1369     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1370     PetscCall(PetscDrawIsNull(draw,&isnull));
1371     if (isnull) PetscFunctionReturn(0);
1372   }
1373 
1374   { /* assemble the entire matrix onto first processor */
1375     Mat A = NULL, Av;
1376     IS  isrow,iscol;
1377 
1378     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1379     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1380     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1381     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1382 /*  The commented code uses MatCreateSubMatrices instead */
1383 /*
1384     Mat *AA, A = NULL, Av;
1385     IS  isrow,iscol;
1386 
1387     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1388     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1389     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1390     if (rank == 0) {
1391        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1392        A    = AA[0];
1393        Av   = AA[0];
1394     }
1395     PetscCall(MatDestroySubMatrices(1,&AA));
1396 */
1397     PetscCall(ISDestroy(&iscol));
1398     PetscCall(ISDestroy(&isrow));
1399     /*
1400        Everyone has to call to draw the matrix since the graphics waits are
1401        synchronized across all processors that share the PetscDraw object
1402     */
1403     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1404     if (rank == 0) {
1405       if (((PetscObject)mat)->name) {
1406         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1407       }
1408       PetscCall(MatView_SeqAIJ(Av,sviewer));
1409     }
1410     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1411     PetscCall(PetscViewerFlush(viewer));
1412     PetscCall(MatDestroy(&A));
1413   }
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1418 {
1419   PetscBool      iascii,isdraw,issocket,isbinary;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1423   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1424   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1425   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1426   if (iascii || isdraw || isbinary || issocket) {
1427     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1433 {
1434   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1435   Vec            bb1 = NULL;
1436   PetscBool      hasop;
1437 
1438   PetscFunctionBegin;
1439   if (flag == SOR_APPLY_UPPER) {
1440     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1441     PetscFunctionReturn(0);
1442   }
1443 
1444   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1445     PetscCall(VecDuplicate(bb,&bb1));
1446   }
1447 
1448   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1449     if (flag & SOR_ZERO_INITIAL_GUESS) {
1450       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1451       its--;
1452     }
1453 
1454     while (its--) {
1455       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1456       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1457 
1458       /* update rhs: bb1 = bb - B*x */
1459       PetscCall(VecScale(mat->lvec,-1.0));
1460       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1461 
1462       /* local sweep */
1463       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1464     }
1465   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1466     if (flag & SOR_ZERO_INITIAL_GUESS) {
1467       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1468       its--;
1469     }
1470     while (its--) {
1471       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1472       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1473 
1474       /* update rhs: bb1 = bb - B*x */
1475       PetscCall(VecScale(mat->lvec,-1.0));
1476       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1477 
1478       /* local sweep */
1479       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1480     }
1481   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1482     if (flag & SOR_ZERO_INITIAL_GUESS) {
1483       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1484       its--;
1485     }
1486     while (its--) {
1487       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1488       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1489 
1490       /* update rhs: bb1 = bb - B*x */
1491       PetscCall(VecScale(mat->lvec,-1.0));
1492       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1493 
1494       /* local sweep */
1495       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1496     }
1497   } else if (flag & SOR_EISENSTAT) {
1498     Vec xx1;
1499 
1500     PetscCall(VecDuplicate(bb,&xx1));
1501     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1502 
1503     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1504     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1505     if (!mat->diag) {
1506       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1507       PetscCall(MatGetDiagonal(matin,mat->diag));
1508     }
1509     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1510     if (hasop) {
1511       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1512     } else {
1513       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1514     }
1515     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1516 
1517     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1518 
1519     /* local sweep */
1520     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1521     PetscCall(VecAXPY(xx,1.0,xx1));
1522     PetscCall(VecDestroy(&xx1));
1523   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1524 
1525   PetscCall(VecDestroy(&bb1));
1526 
1527   matin->factorerrortype = mat->A->factorerrortype;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1532 {
1533   Mat            aA,aB,Aperm;
1534   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1535   PetscScalar    *aa,*ba;
1536   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1537   PetscSF        rowsf,sf;
1538   IS             parcolp = NULL;
1539   PetscBool      done;
1540 
1541   PetscFunctionBegin;
1542   PetscCall(MatGetLocalSize(A,&m,&n));
1543   PetscCall(ISGetIndices(rowp,&rwant));
1544   PetscCall(ISGetIndices(colp,&cwant));
1545   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1546 
1547   /* Invert row permutation to find out where my rows should go */
1548   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1549   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1550   PetscCall(PetscSFSetFromOptions(rowsf));
1551   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1552   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1553   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1554 
1555   /* Invert column permutation to find out where my columns should go */
1556   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1557   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1558   PetscCall(PetscSFSetFromOptions(sf));
1559   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1560   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1561   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1562   PetscCall(PetscSFDestroy(&sf));
1563 
1564   PetscCall(ISRestoreIndices(rowp,&rwant));
1565   PetscCall(ISRestoreIndices(colp,&cwant));
1566   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1567 
1568   /* Find out where my gcols should go */
1569   PetscCall(MatGetSize(aB,NULL,&ng));
1570   PetscCall(PetscMalloc1(ng,&gcdest));
1571   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1572   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1573   PetscCall(PetscSFSetFromOptions(sf));
1574   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1575   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1576   PetscCall(PetscSFDestroy(&sf));
1577 
1578   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1579   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1580   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1581   for (i=0; i<m; i++) {
1582     PetscInt    row = rdest[i];
1583     PetscMPIInt rowner;
1584     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1585     for (j=ai[i]; j<ai[i+1]; j++) {
1586       PetscInt    col = cdest[aj[j]];
1587       PetscMPIInt cowner;
1588       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1589       if (rowner == cowner) dnnz[i]++;
1590       else onnz[i]++;
1591     }
1592     for (j=bi[i]; j<bi[i+1]; j++) {
1593       PetscInt    col = gcdest[bj[j]];
1594       PetscMPIInt cowner;
1595       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1596       if (rowner == cowner) dnnz[i]++;
1597       else onnz[i]++;
1598     }
1599   }
1600   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1601   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1602   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1603   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1604   PetscCall(PetscSFDestroy(&rowsf));
1605 
1606   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1607   PetscCall(MatSeqAIJGetArray(aA,&aa));
1608   PetscCall(MatSeqAIJGetArray(aB,&ba));
1609   for (i=0; i<m; i++) {
1610     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1611     PetscInt j0,rowlen;
1612     rowlen = ai[i+1] - ai[i];
1613     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1614       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1615       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1616     }
1617     rowlen = bi[i+1] - bi[i];
1618     for (j0=j=0; j<rowlen; j0=j) {
1619       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1620       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1621     }
1622   }
1623   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1624   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1625   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1626   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1627   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1628   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1629   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1630   PetscCall(PetscFree3(work,rdest,cdest));
1631   PetscCall(PetscFree(gcdest));
1632   if (parcolp) PetscCall(ISDestroy(&colp));
1633   *B = Aperm;
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1638 {
1639   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1640 
1641   PetscFunctionBegin;
1642   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1643   if (ghosts) *ghosts = aij->garray;
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1648 {
1649   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1650   Mat            A    = mat->A,B = mat->B;
1651   PetscLogDouble isend[5],irecv[5];
1652 
1653   PetscFunctionBegin;
1654   info->block_size = 1.0;
1655   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1656 
1657   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1658   isend[3] = info->memory;  isend[4] = info->mallocs;
1659 
1660   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1661 
1662   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1663   isend[3] += info->memory;  isend[4] += info->mallocs;
1664   if (flag == MAT_LOCAL) {
1665     info->nz_used      = isend[0];
1666     info->nz_allocated = isend[1];
1667     info->nz_unneeded  = isend[2];
1668     info->memory       = isend[3];
1669     info->mallocs      = isend[4];
1670   } else if (flag == MAT_GLOBAL_MAX) {
1671     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1672 
1673     info->nz_used      = irecv[0];
1674     info->nz_allocated = irecv[1];
1675     info->nz_unneeded  = irecv[2];
1676     info->memory       = irecv[3];
1677     info->mallocs      = irecv[4];
1678   } else if (flag == MAT_GLOBAL_SUM) {
1679     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1680 
1681     info->nz_used      = irecv[0];
1682     info->nz_allocated = irecv[1];
1683     info->nz_unneeded  = irecv[2];
1684     info->memory       = irecv[3];
1685     info->mallocs      = irecv[4];
1686   }
1687   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1688   info->fill_ratio_needed = 0;
1689   info->factor_mallocs    = 0;
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1694 {
1695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1696 
1697   PetscFunctionBegin;
1698   switch (op) {
1699   case MAT_NEW_NONZERO_LOCATIONS:
1700   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1701   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1702   case MAT_KEEP_NONZERO_PATTERN:
1703   case MAT_NEW_NONZERO_LOCATION_ERR:
1704   case MAT_USE_INODES:
1705   case MAT_IGNORE_ZERO_ENTRIES:
1706   case MAT_FORM_EXPLICIT_TRANSPOSE:
1707     MatCheckPreallocated(A,1);
1708     PetscCall(MatSetOption(a->A,op,flg));
1709     PetscCall(MatSetOption(a->B,op,flg));
1710     break;
1711   case MAT_ROW_ORIENTED:
1712     MatCheckPreallocated(A,1);
1713     a->roworiented = flg;
1714 
1715     PetscCall(MatSetOption(a->A,op,flg));
1716     PetscCall(MatSetOption(a->B,op,flg));
1717     break;
1718   case MAT_FORCE_DIAGONAL_ENTRIES:
1719   case MAT_SORTED_FULL:
1720     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1721     break;
1722   case MAT_IGNORE_OFF_PROC_ENTRIES:
1723     a->donotstash = flg;
1724     break;
1725   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1726   case MAT_SPD:
1727   case MAT_SYMMETRIC:
1728   case MAT_STRUCTURALLY_SYMMETRIC:
1729   case MAT_HERMITIAN:
1730   case MAT_SYMMETRY_ETERNAL:
1731     break;
1732   case MAT_SUBMAT_SINGLEIS:
1733     A->submat_singleis = flg;
1734     break;
1735   case MAT_STRUCTURE_ONLY:
1736     /* The option is handled directly by MatSetOption() */
1737     break;
1738   default:
1739     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1745 {
1746   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1747   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1748   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1749   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1750   PetscInt       *cmap,*idx_p;
1751 
1752   PetscFunctionBegin;
1753   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1754   mat->getrowactive = PETSC_TRUE;
1755 
1756   if (!mat->rowvalues && (idx || v)) {
1757     /*
1758         allocate enough space to hold information from the longest row.
1759     */
1760     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1761     PetscInt   max = 1,tmp;
1762     for (i=0; i<matin->rmap->n; i++) {
1763       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1764       if (max < tmp) max = tmp;
1765     }
1766     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1767   }
1768 
1769   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1770   lrow = row - rstart;
1771 
1772   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1773   if (!v)   {pvA = NULL; pvB = NULL;}
1774   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1775   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1776   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1777   nztot = nzA + nzB;
1778 
1779   cmap = mat->garray;
1780   if (v  || idx) {
1781     if (nztot) {
1782       /* Sort by increasing column numbers, assuming A and B already sorted */
1783       PetscInt imark = -1;
1784       if (v) {
1785         *v = v_p = mat->rowvalues;
1786         for (i=0; i<nzB; i++) {
1787           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1788           else break;
1789         }
1790         imark = i;
1791         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1792         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1793       }
1794       if (idx) {
1795         *idx = idx_p = mat->rowindices;
1796         if (imark > -1) {
1797           for (i=0; i<imark; i++) {
1798             idx_p[i] = cmap[cworkB[i]];
1799           }
1800         } else {
1801           for (i=0; i<nzB; i++) {
1802             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1803             else break;
1804           }
1805           imark = i;
1806         }
1807         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1808         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1809       }
1810     } else {
1811       if (idx) *idx = NULL;
1812       if (v)   *v   = NULL;
1813     }
1814   }
1815   *nz  = nztot;
1816   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1817   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1822 {
1823   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1824 
1825   PetscFunctionBegin;
1826   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1827   aij->getrowactive = PETSC_FALSE;
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1832 {
1833   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1834   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1835   PetscInt        i,j,cstart = mat->cmap->rstart;
1836   PetscReal       sum = 0.0;
1837   const MatScalar *v,*amata,*bmata;
1838 
1839   PetscFunctionBegin;
1840   if (aij->size == 1) {
1841     PetscCall(MatNorm(aij->A,type,norm));
1842   } else {
1843     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1844     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1845     if (type == NORM_FROBENIUS) {
1846       v = amata;
1847       for (i=0; i<amat->nz; i++) {
1848         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1849       }
1850       v = bmata;
1851       for (i=0; i<bmat->nz; i++) {
1852         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1853       }
1854       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1855       *norm = PetscSqrtReal(*norm);
1856       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1857     } else if (type == NORM_1) { /* max column norm */
1858       PetscReal *tmp,*tmp2;
1859       PetscInt  *jj,*garray = aij->garray;
1860       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1861       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1862       *norm = 0.0;
1863       v     = amata; jj = amat->j;
1864       for (j=0; j<amat->nz; j++) {
1865         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1866       }
1867       v = bmata; jj = bmat->j;
1868       for (j=0; j<bmat->nz; j++) {
1869         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1870       }
1871       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1872       for (j=0; j<mat->cmap->N; j++) {
1873         if (tmp2[j] > *norm) *norm = tmp2[j];
1874       }
1875       PetscCall(PetscFree(tmp));
1876       PetscCall(PetscFree(tmp2));
1877       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1878     } else if (type == NORM_INFINITY) { /* max row norm */
1879       PetscReal ntemp = 0.0;
1880       for (j=0; j<aij->A->rmap->n; j++) {
1881         v   = amata + amat->i[j];
1882         sum = 0.0;
1883         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1884           sum += PetscAbsScalar(*v); v++;
1885         }
1886         v = bmata + bmat->i[j];
1887         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1888           sum += PetscAbsScalar(*v); v++;
1889         }
1890         if (sum > ntemp) ntemp = sum;
1891       }
1892       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1893       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1894     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1895     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1896     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1902 {
1903   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1904   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1905   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1906   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1907   Mat             B,A_diag,*B_diag;
1908   const MatScalar *pbv,*bv;
1909 
1910   PetscFunctionBegin;
1911   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1912   ai = Aloc->i; aj = Aloc->j;
1913   bi = Bloc->i; bj = Bloc->j;
1914   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1915     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1916     PetscSFNode          *oloc;
1917     PETSC_UNUSED PetscSF sf;
1918 
1919     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1920     /* compute d_nnz for preallocation */
1921     PetscCall(PetscArrayzero(d_nnz,na));
1922     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1923     /* compute local off-diagonal contributions */
1924     PetscCall(PetscArrayzero(g_nnz,nb));
1925     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1926     /* map those to global */
1927     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1928     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1929     PetscCall(PetscSFSetFromOptions(sf));
1930     PetscCall(PetscArrayzero(o_nnz,na));
1931     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1932     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1933     PetscCall(PetscSFDestroy(&sf));
1934 
1935     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1936     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1937     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1938     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1939     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1940     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1941   } else {
1942     B    = *matout;
1943     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1944   }
1945 
1946   b           = (Mat_MPIAIJ*)B->data;
1947   A_diag      = a->A;
1948   B_diag      = &b->A;
1949   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1950   A_diag_ncol = A_diag->cmap->N;
1951   B_diag_ilen = sub_B_diag->ilen;
1952   B_diag_i    = sub_B_diag->i;
1953 
1954   /* Set ilen for diagonal of B */
1955   for (i=0; i<A_diag_ncol; i++) {
1956     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1957   }
1958 
1959   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1960   very quickly (=without using MatSetValues), because all writes are local. */
1961   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1962 
1963   /* copy over the B part */
1964   PetscCall(PetscMalloc1(bi[mb],&cols));
1965   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1966   pbv  = bv;
1967   row  = A->rmap->rstart;
1968   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1969   cols_tmp = cols;
1970   for (i=0; i<mb; i++) {
1971     ncol = bi[i+1]-bi[i];
1972     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1973     row++;
1974     pbv += ncol; cols_tmp += ncol;
1975   }
1976   PetscCall(PetscFree(cols));
1977   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1978 
1979   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1980   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1981   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1982     *matout = B;
1983   } else {
1984     PetscCall(MatHeaderMerge(A,&B));
1985   }
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1990 {
1991   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1992   Mat            a    = aij->A,b = aij->B;
1993   PetscInt       s1,s2,s3;
1994 
1995   PetscFunctionBegin;
1996   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1997   if (rr) {
1998     PetscCall(VecGetLocalSize(rr,&s1));
1999     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2000     /* Overlap communication with computation. */
2001     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2002   }
2003   if (ll) {
2004     PetscCall(VecGetLocalSize(ll,&s1));
2005     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2006     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
2007   }
2008   /* scale  the diagonal block */
2009   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2010 
2011   if (rr) {
2012     /* Do a scatter end and then right scale the off-diagonal block */
2013     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2014     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2020 {
2021   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2022 
2023   PetscFunctionBegin;
2024   PetscCall(MatSetUnfactored(a->A));
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2029 {
2030   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2031   Mat            a,b,c,d;
2032   PetscBool      flg;
2033 
2034   PetscFunctionBegin;
2035   a = matA->A; b = matA->B;
2036   c = matB->A; d = matB->B;
2037 
2038   PetscCall(MatEqual(a,c,&flg));
2039   if (flg) {
2040     PetscCall(MatEqual(b,d,&flg));
2041   }
2042   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2047 {
2048   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2049   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2050 
2051   PetscFunctionBegin;
2052   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2053   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2054     /* because of the column compression in the off-processor part of the matrix a->B,
2055        the number of columns in a->B and b->B may be different, hence we cannot call
2056        the MatCopy() directly on the two parts. If need be, we can provide a more
2057        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2058        then copying the submatrices */
2059     PetscCall(MatCopy_Basic(A,B,str));
2060   } else {
2061     PetscCall(MatCopy(a->A,b->A,str));
2062     PetscCall(MatCopy(a->B,b->B,str));
2063   }
2064   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2069 {
2070   PetscFunctionBegin;
2071   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 /*
2076    Computes the number of nonzeros per row needed for preallocation when X and Y
2077    have different nonzero structure.
2078 */
2079 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2080 {
2081   PetscInt       i,j,k,nzx,nzy;
2082 
2083   PetscFunctionBegin;
2084   /* Set the number of nonzeros in the new matrix */
2085   for (i=0; i<m; i++) {
2086     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2087     nzx = xi[i+1] - xi[i];
2088     nzy = yi[i+1] - yi[i];
2089     nnz[i] = 0;
2090     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2091       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2092       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2093       nnz[i]++;
2094     }
2095     for (; k<nzy; k++) nnz[i]++;
2096   }
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2101 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2102 {
2103   PetscInt       m = Y->rmap->N;
2104   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2105   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2106 
2107   PetscFunctionBegin;
2108   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2113 {
2114   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2115 
2116   PetscFunctionBegin;
2117   if (str == SAME_NONZERO_PATTERN) {
2118     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2119     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2120   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2121     PetscCall(MatAXPY_Basic(Y,a,X,str));
2122   } else {
2123     Mat      B;
2124     PetscInt *nnz_d,*nnz_o;
2125 
2126     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2127     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2128     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2129     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2130     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2131     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2132     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2133     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2134     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2135     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2136     PetscCall(MatHeaderMerge(Y,&B));
2137     PetscCall(PetscFree(nnz_d));
2138     PetscCall(PetscFree(nnz_o));
2139   }
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2144 
2145 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2146 {
2147   PetscFunctionBegin;
2148   if (PetscDefined(USE_COMPLEX)) {
2149     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2150 
2151     PetscCall(MatConjugate_SeqAIJ(aij->A));
2152     PetscCall(MatConjugate_SeqAIJ(aij->B));
2153   }
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2158 {
2159   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2160 
2161   PetscFunctionBegin;
2162   PetscCall(MatRealPart(a->A));
2163   PetscCall(MatRealPart(a->B));
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2170 
2171   PetscFunctionBegin;
2172   PetscCall(MatImaginaryPart(a->A));
2173   PetscCall(MatImaginaryPart(a->B));
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2178 {
2179   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2180   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2181   PetscScalar       *va,*vv;
2182   Vec               vB,vA;
2183   const PetscScalar *vb;
2184 
2185   PetscFunctionBegin;
2186   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2187   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2188 
2189   PetscCall(VecGetArrayWrite(vA,&va));
2190   if (idx) {
2191     for (i=0; i<m; i++) {
2192       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2193     }
2194   }
2195 
2196   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2197   PetscCall(PetscMalloc1(m,&idxb));
2198   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2199 
2200   PetscCall(VecGetArrayWrite(v,&vv));
2201   PetscCall(VecGetArrayRead(vB,&vb));
2202   for (i=0; i<m; i++) {
2203     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2204       vv[i] = vb[i];
2205       if (idx) idx[i] = a->garray[idxb[i]];
2206     } else {
2207       vv[i] = va[i];
2208       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2209         idx[i] = a->garray[idxb[i]];
2210     }
2211   }
2212   PetscCall(VecRestoreArrayWrite(vA,&vv));
2213   PetscCall(VecRestoreArrayWrite(vA,&va));
2214   PetscCall(VecRestoreArrayRead(vB,&vb));
2215   PetscCall(PetscFree(idxb));
2216   PetscCall(VecDestroy(&vA));
2217   PetscCall(VecDestroy(&vB));
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2222 {
2223   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2224   PetscInt          m = A->rmap->n,n = A->cmap->n;
2225   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2226   PetscInt          *cmap  = mat->garray;
2227   PetscInt          *diagIdx, *offdiagIdx;
2228   Vec               diagV, offdiagV;
2229   PetscScalar       *a, *diagA, *offdiagA;
2230   const PetscScalar *ba,*bav;
2231   PetscInt          r,j,col,ncols,*bi,*bj;
2232   Mat               B = mat->B;
2233   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2234 
2235   PetscFunctionBegin;
2236   /* When a process holds entire A and other processes have no entry */
2237   if (A->cmap->N == n) {
2238     PetscCall(VecGetArrayWrite(v,&diagA));
2239     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2240     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2241     PetscCall(VecDestroy(&diagV));
2242     PetscCall(VecRestoreArrayWrite(v,&diagA));
2243     PetscFunctionReturn(0);
2244   } else if (n == 0) {
2245     if (m) {
2246       PetscCall(VecGetArrayWrite(v,&a));
2247       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2248       PetscCall(VecRestoreArrayWrite(v,&a));
2249     }
2250     PetscFunctionReturn(0);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2260   ba   = bav;
2261   bi   = b->i;
2262   bj   = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r+1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2268     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2269       offdiagA[r] = 0.0;
2270 
2271       /* Find first hole in the cmap */
2272       for (j=0; j<ncols; j++) {
2273         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2274         if (col > j && j < cstart) {
2275           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2276           break;
2277         } else if (col > j + n && j >= cstart) {
2278           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2279           break;
2280         }
2281       }
2282       if (j == ncols && ncols < A->cmap->N - n) {
2283         /* a hole is outside compressed Bcols */
2284         if (ncols == 0) {
2285           if (cstart) {
2286             offdiagIdx[r] = 0;
2287           } else offdiagIdx[r] = cend;
2288         } else { /* ncols > 0 */
2289           offdiagIdx[r] = cmap[ncols-1] + 1;
2290           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2291         }
2292       }
2293     }
2294 
2295     for (j=0; j<ncols; j++) {
2296       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2297       ba++; bj++;
2298     }
2299   }
2300 
2301   PetscCall(VecGetArrayWrite(v, &a));
2302   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2303   for (r = 0; r < m; ++r) {
2304     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2305       a[r]   = diagA[r];
2306       if (idx) idx[r] = cstart + diagIdx[r];
2307     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2308       a[r] = diagA[r];
2309       if (idx) {
2310         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2311           idx[r] = cstart + diagIdx[r];
2312         } else idx[r] = offdiagIdx[r];
2313       }
2314     } else {
2315       a[r]   = offdiagA[r];
2316       if (idx) idx[r] = offdiagIdx[r];
2317     }
2318   }
2319   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2320   PetscCall(VecRestoreArrayWrite(v, &a));
2321   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2322   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2323   PetscCall(VecDestroy(&diagV));
2324   PetscCall(VecDestroy(&offdiagV));
2325   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2330 {
2331   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2332   PetscInt          m = A->rmap->n,n = A->cmap->n;
2333   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2334   PetscInt          *cmap  = mat->garray;
2335   PetscInt          *diagIdx, *offdiagIdx;
2336   Vec               diagV, offdiagV;
2337   PetscScalar       *a, *diagA, *offdiagA;
2338   const PetscScalar *ba,*bav;
2339   PetscInt          r,j,col,ncols,*bi,*bj;
2340   Mat               B = mat->B;
2341   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2342 
2343   PetscFunctionBegin;
2344   /* When a process holds entire A and other processes have no entry */
2345   if (A->cmap->N == n) {
2346     PetscCall(VecGetArrayWrite(v,&diagA));
2347     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2348     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2349     PetscCall(VecDestroy(&diagV));
2350     PetscCall(VecRestoreArrayWrite(v,&diagA));
2351     PetscFunctionReturn(0);
2352   } else if (n == 0) {
2353     if (m) {
2354       PetscCall(VecGetArrayWrite(v,&a));
2355       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2356       PetscCall(VecRestoreArrayWrite(v,&a));
2357     }
2358     PetscFunctionReturn(0);
2359   }
2360 
2361   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2362   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2363   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2364   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2365 
2366   /* Get offdiagIdx[] for implicit 0.0 */
2367   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2368   ba   = bav;
2369   bi   = b->i;
2370   bj   = b->j;
2371   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2372   for (r = 0; r < m; r++) {
2373     ncols = bi[r+1] - bi[r];
2374     if (ncols == A->cmap->N - n) { /* Brow is dense */
2375       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2376     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2377       offdiagA[r] = 0.0;
2378 
2379       /* Find first hole in the cmap */
2380       for (j=0; j<ncols; j++) {
2381         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2382         if (col > j && j < cstart) {
2383           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2384           break;
2385         } else if (col > j + n && j >= cstart) {
2386           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2387           break;
2388         }
2389       }
2390       if (j == ncols && ncols < A->cmap->N - n) {
2391         /* a hole is outside compressed Bcols */
2392         if (ncols == 0) {
2393           if (cstart) {
2394             offdiagIdx[r] = 0;
2395           } else offdiagIdx[r] = cend;
2396         } else { /* ncols > 0 */
2397           offdiagIdx[r] = cmap[ncols-1] + 1;
2398           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2399         }
2400       }
2401     }
2402 
2403     for (j=0; j<ncols; j++) {
2404       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2405       ba++; bj++;
2406     }
2407   }
2408 
2409   PetscCall(VecGetArrayWrite(v, &a));
2410   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2411   for (r = 0; r < m; ++r) {
2412     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2413       a[r]   = diagA[r];
2414       if (idx) idx[r] = cstart + diagIdx[r];
2415     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2416       a[r] = diagA[r];
2417       if (idx) {
2418         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2419           idx[r] = cstart + diagIdx[r];
2420         } else idx[r] = offdiagIdx[r];
2421       }
2422     } else {
2423       a[r]   = offdiagA[r];
2424       if (idx) idx[r] = offdiagIdx[r];
2425     }
2426   }
2427   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2428   PetscCall(VecRestoreArrayWrite(v, &a));
2429   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2430   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2431   PetscCall(VecDestroy(&diagV));
2432   PetscCall(VecDestroy(&offdiagV));
2433   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2438 {
2439   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2440   PetscInt          m = A->rmap->n,n = A->cmap->n;
2441   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2442   PetscInt          *cmap  = mat->garray;
2443   PetscInt          *diagIdx, *offdiagIdx;
2444   Vec               diagV, offdiagV;
2445   PetscScalar       *a, *diagA, *offdiagA;
2446   const PetscScalar *ba,*bav;
2447   PetscInt          r,j,col,ncols,*bi,*bj;
2448   Mat               B = mat->B;
2449   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2450 
2451   PetscFunctionBegin;
2452   /* When a process holds entire A and other processes have no entry */
2453   if (A->cmap->N == n) {
2454     PetscCall(VecGetArrayWrite(v,&diagA));
2455     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2456     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2457     PetscCall(VecDestroy(&diagV));
2458     PetscCall(VecRestoreArrayWrite(v,&diagA));
2459     PetscFunctionReturn(0);
2460   } else if (n == 0) {
2461     if (m) {
2462       PetscCall(VecGetArrayWrite(v,&a));
2463       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2464       PetscCall(VecRestoreArrayWrite(v,&a));
2465     }
2466     PetscFunctionReturn(0);
2467   }
2468 
2469   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2470   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2471   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2472   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2473 
2474   /* Get offdiagIdx[] for implicit 0.0 */
2475   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2476   ba   = bav;
2477   bi   = b->i;
2478   bj   = b->j;
2479   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2480   for (r = 0; r < m; r++) {
2481     ncols = bi[r+1] - bi[r];
2482     if (ncols == A->cmap->N - n) { /* Brow is dense */
2483       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2484     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2485       offdiagA[r] = 0.0;
2486 
2487       /* Find first hole in the cmap */
2488       for (j=0; j<ncols; j++) {
2489         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2490         if (col > j && j < cstart) {
2491           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2492           break;
2493         } else if (col > j + n && j >= cstart) {
2494           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2495           break;
2496         }
2497       }
2498       if (j == ncols && ncols < A->cmap->N - n) {
2499         /* a hole is outside compressed Bcols */
2500         if (ncols == 0) {
2501           if (cstart) {
2502             offdiagIdx[r] = 0;
2503           } else offdiagIdx[r] = cend;
2504         } else { /* ncols > 0 */
2505           offdiagIdx[r] = cmap[ncols-1] + 1;
2506           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2507         }
2508       }
2509     }
2510 
2511     for (j=0; j<ncols; j++) {
2512       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2513       ba++; bj++;
2514     }
2515   }
2516 
2517   PetscCall(VecGetArrayWrite(v,    &a));
2518   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2519   for (r = 0; r < m; ++r) {
2520     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2521       a[r] = diagA[r];
2522       if (idx) idx[r] = cstart + diagIdx[r];
2523     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2524       a[r] = diagA[r];
2525       if (idx) {
2526         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2527           idx[r] = cstart + diagIdx[r];
2528         } else idx[r] = offdiagIdx[r];
2529       }
2530     } else {
2531       a[r] = offdiagA[r];
2532       if (idx) idx[r] = offdiagIdx[r];
2533     }
2534   }
2535   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2536   PetscCall(VecRestoreArrayWrite(v,       &a));
2537   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2538   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2539   PetscCall(VecDestroy(&diagV));
2540   PetscCall(VecDestroy(&offdiagV));
2541   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2546 {
2547   Mat            *dummy;
2548 
2549   PetscFunctionBegin;
2550   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2551   *newmat = *dummy;
2552   PetscCall(PetscFree(dummy));
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2557 {
2558   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2559 
2560   PetscFunctionBegin;
2561   PetscCall(MatInvertBlockDiagonal(a->A,values));
2562   A->factorerrortype = a->A->factorerrortype;
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2567 {
2568   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2569 
2570   PetscFunctionBegin;
2571   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2572   PetscCall(MatSetRandom(aij->A,rctx));
2573   if (x->assembled) {
2574     PetscCall(MatSetRandom(aij->B,rctx));
2575   } else {
2576     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2577   }
2578   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2579   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2584 {
2585   PetscFunctionBegin;
2586   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2587   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 /*@
2592    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2593 
2594    Collective on Mat
2595 
2596    Input Parameters:
2597 +    A - the matrix
2598 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2599 
2600  Level: advanced
2601 
2602 @*/
2603 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2604 {
2605   PetscFunctionBegin;
2606   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2611 {
2612   PetscBool            sc = PETSC_FALSE,flg;
2613 
2614   PetscFunctionBegin;
2615   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2616   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2617   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2618   if (flg) {
2619     PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2620   }
2621   PetscOptionsHeadEnd();
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2626 {
2627   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2628   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2629 
2630   PetscFunctionBegin;
2631   if (!Y->preallocated) {
2632     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2633   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2634     PetscInt nonew = aij->nonew;
2635     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2636     aij->nonew = nonew;
2637   }
2638   PetscCall(MatShift_Basic(Y,a));
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2643 {
2644   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2645 
2646   PetscFunctionBegin;
2647   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2648   PetscCall(MatMissingDiagonal(a->A,missing,d));
2649   if (d) {
2650     PetscInt rstart;
2651     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2652     *d += rstart;
2653 
2654   }
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2659 {
2660   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2661 
2662   PetscFunctionBegin;
2663   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 /* -------------------------------------------------------------------*/
2668 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2669                                        MatGetRow_MPIAIJ,
2670                                        MatRestoreRow_MPIAIJ,
2671                                        MatMult_MPIAIJ,
2672                                 /* 4*/ MatMultAdd_MPIAIJ,
2673                                        MatMultTranspose_MPIAIJ,
2674                                        MatMultTransposeAdd_MPIAIJ,
2675                                        NULL,
2676                                        NULL,
2677                                        NULL,
2678                                 /*10*/ NULL,
2679                                        NULL,
2680                                        NULL,
2681                                        MatSOR_MPIAIJ,
2682                                        MatTranspose_MPIAIJ,
2683                                 /*15*/ MatGetInfo_MPIAIJ,
2684                                        MatEqual_MPIAIJ,
2685                                        MatGetDiagonal_MPIAIJ,
2686                                        MatDiagonalScale_MPIAIJ,
2687                                        MatNorm_MPIAIJ,
2688                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2689                                        MatAssemblyEnd_MPIAIJ,
2690                                        MatSetOption_MPIAIJ,
2691                                        MatZeroEntries_MPIAIJ,
2692                                 /*24*/ MatZeroRows_MPIAIJ,
2693                                        NULL,
2694                                        NULL,
2695                                        NULL,
2696                                        NULL,
2697                                 /*29*/ MatSetUp_MPIAIJ,
2698                                        NULL,
2699                                        NULL,
2700                                        MatGetDiagonalBlock_MPIAIJ,
2701                                        NULL,
2702                                 /*34*/ MatDuplicate_MPIAIJ,
2703                                        NULL,
2704                                        NULL,
2705                                        NULL,
2706                                        NULL,
2707                                 /*39*/ MatAXPY_MPIAIJ,
2708                                        MatCreateSubMatrices_MPIAIJ,
2709                                        MatIncreaseOverlap_MPIAIJ,
2710                                        MatGetValues_MPIAIJ,
2711                                        MatCopy_MPIAIJ,
2712                                 /*44*/ MatGetRowMax_MPIAIJ,
2713                                        MatScale_MPIAIJ,
2714                                        MatShift_MPIAIJ,
2715                                        MatDiagonalSet_MPIAIJ,
2716                                        MatZeroRowsColumns_MPIAIJ,
2717                                 /*49*/ MatSetRandom_MPIAIJ,
2718                                        MatGetRowIJ_MPIAIJ,
2719                                        MatRestoreRowIJ_MPIAIJ,
2720                                        NULL,
2721                                        NULL,
2722                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2723                                        NULL,
2724                                        MatSetUnfactored_MPIAIJ,
2725                                        MatPermute_MPIAIJ,
2726                                        NULL,
2727                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2728                                        MatDestroy_MPIAIJ,
2729                                        MatView_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                 /*64*/ NULL,
2733                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2734                                        NULL,
2735                                        NULL,
2736                                        NULL,
2737                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2738                                        MatGetRowMinAbs_MPIAIJ,
2739                                        NULL,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                 /*75*/ MatFDColoringApply_AIJ,
2744                                        MatSetFromOptions_MPIAIJ,
2745                                        NULL,
2746                                        NULL,
2747                                        MatFindZeroDiagonals_MPIAIJ,
2748                                 /*80*/ NULL,
2749                                        NULL,
2750                                        NULL,
2751                                 /*83*/ MatLoad_MPIAIJ,
2752                                        MatIsSymmetric_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                 /*89*/ NULL,
2758                                        NULL,
2759                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2760                                        NULL,
2761                                        NULL,
2762                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        NULL,
2766                                        MatBindToCPU_MPIAIJ,
2767                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        MatConjugate_MPIAIJ,
2771                                        NULL,
2772                                 /*104*/MatSetValuesRow_MPIAIJ,
2773                                        MatRealPart_MPIAIJ,
2774                                        MatImaginaryPart_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                 /*109*/NULL,
2778                                        NULL,
2779                                        MatGetRowMin_MPIAIJ,
2780                                        NULL,
2781                                        MatMissingDiagonal_MPIAIJ,
2782                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2783                                        NULL,
2784                                        MatGetGhosts_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        MatGetMultiProcBlock_MPIAIJ,
2792                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2793                                        MatGetColumnReductions_MPIAIJ,
2794                                        MatInvertBlockDiagonal_MPIAIJ,
2795                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2796                                        MatCreateSubMatricesMPI_MPIAIJ,
2797                                 /*129*/NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2801                                        NULL,
2802                                 /*134*/NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                 /*139*/MatSetBlockSizes_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        MatFDColoringSetUp_MPIXAIJ,
2811                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2812                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2813                                 /*145*/NULL,
2814                                        NULL,
2815                                        NULL
2816 };
2817 
2818 /* ----------------------------------------------------------------------------------------*/
2819 
2820 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2821 {
2822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2823 
2824   PetscFunctionBegin;
2825   PetscCall(MatStoreValues(aij->A));
2826   PetscCall(MatStoreValues(aij->B));
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2831 {
2832   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2833 
2834   PetscFunctionBegin;
2835   PetscCall(MatRetrieveValues(aij->A));
2836   PetscCall(MatRetrieveValues(aij->B));
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2841 {
2842   Mat_MPIAIJ     *b;
2843   PetscMPIInt    size;
2844 
2845   PetscFunctionBegin;
2846   PetscCall(PetscLayoutSetUp(B->rmap));
2847   PetscCall(PetscLayoutSetUp(B->cmap));
2848   b = (Mat_MPIAIJ*)B->data;
2849 
2850 #if defined(PETSC_USE_CTABLE)
2851   PetscCall(PetscTableDestroy(&b->colmap));
2852 #else
2853   PetscCall(PetscFree(b->colmap));
2854 #endif
2855   PetscCall(PetscFree(b->garray));
2856   PetscCall(VecDestroy(&b->lvec));
2857   PetscCall(VecScatterDestroy(&b->Mvctx));
2858 
2859   /* Because the B will have been resized we simply destroy it and create a new one each time */
2860   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2861   PetscCall(MatDestroy(&b->B));
2862   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2863   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2864   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2865   PetscCall(MatSetType(b->B,MATSEQAIJ));
2866   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2867 
2868   if (!B->preallocated) {
2869     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2870     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2871     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2872     PetscCall(MatSetType(b->A,MATSEQAIJ));
2873     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2874   }
2875 
2876   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2877   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2878   B->preallocated  = PETSC_TRUE;
2879   B->was_assembled = PETSC_FALSE;
2880   B->assembled     = PETSC_FALSE;
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2885 {
2886   Mat_MPIAIJ     *b;
2887 
2888   PetscFunctionBegin;
2889   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2890   PetscCall(PetscLayoutSetUp(B->rmap));
2891   PetscCall(PetscLayoutSetUp(B->cmap));
2892   b = (Mat_MPIAIJ*)B->data;
2893 
2894 #if defined(PETSC_USE_CTABLE)
2895   PetscCall(PetscTableDestroy(&b->colmap));
2896 #else
2897   PetscCall(PetscFree(b->colmap));
2898 #endif
2899   PetscCall(PetscFree(b->garray));
2900   PetscCall(VecDestroy(&b->lvec));
2901   PetscCall(VecScatterDestroy(&b->Mvctx));
2902 
2903   PetscCall(MatResetPreallocation(b->A));
2904   PetscCall(MatResetPreallocation(b->B));
2905   B->preallocated  = PETSC_TRUE;
2906   B->was_assembled = PETSC_FALSE;
2907   B->assembled = PETSC_FALSE;
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2912 {
2913   Mat            mat;
2914   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2915 
2916   PetscFunctionBegin;
2917   *newmat = NULL;
2918   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2919   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2920   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2921   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2922   a       = (Mat_MPIAIJ*)mat->data;
2923 
2924   mat->factortype   = matin->factortype;
2925   mat->assembled    = matin->assembled;
2926   mat->insertmode   = NOT_SET_VALUES;
2927   mat->preallocated = matin->preallocated;
2928 
2929   a->size         = oldmat->size;
2930   a->rank         = oldmat->rank;
2931   a->donotstash   = oldmat->donotstash;
2932   a->roworiented  = oldmat->roworiented;
2933   a->rowindices   = NULL;
2934   a->rowvalues    = NULL;
2935   a->getrowactive = PETSC_FALSE;
2936 
2937   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2938   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2939 
2940   if (oldmat->colmap) {
2941 #if defined(PETSC_USE_CTABLE)
2942     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2943 #else
2944     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2945     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2946     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2947 #endif
2948   } else a->colmap = NULL;
2949   if (oldmat->garray) {
2950     PetscInt len;
2951     len  = oldmat->B->cmap->n;
2952     PetscCall(PetscMalloc1(len+1,&a->garray));
2953     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2954     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2955   } else a->garray = NULL;
2956 
2957   /* It may happen MatDuplicate is called with a non-assembled matrix
2958      In fact, MatDuplicate only requires the matrix to be preallocated
2959      This may happen inside a DMCreateMatrix_Shell */
2960   if (oldmat->lvec) {
2961     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2962     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2963   }
2964   if (oldmat->Mvctx) {
2965     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2966     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2967   }
2968   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2969   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2970   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2971   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2972   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2973   *newmat = mat;
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2978 {
2979   PetscBool      isbinary, ishdf5;
2980 
2981   PetscFunctionBegin;
2982   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2983   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2984   /* force binary viewer to load .info file if it has not yet done so */
2985   PetscCall(PetscViewerSetUp(viewer));
2986   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2987   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2988   if (isbinary) {
2989     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2990   } else if (ishdf5) {
2991 #if defined(PETSC_HAVE_HDF5)
2992     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2993 #else
2994     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2995 #endif
2996   } else {
2997     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2998   }
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3003 {
3004   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3005   PetscInt       *rowidxs,*colidxs;
3006   PetscScalar    *matvals;
3007 
3008   PetscFunctionBegin;
3009   PetscCall(PetscViewerSetUp(viewer));
3010 
3011   /* read in matrix header */
3012   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3013   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3014   M  = header[1]; N = header[2]; nz = header[3];
3015   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3016   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3017   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3018 
3019   /* set block sizes from the viewer's .info file */
3020   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3021   /* set global sizes if not set already */
3022   if (mat->rmap->N < 0) mat->rmap->N = M;
3023   if (mat->cmap->N < 0) mat->cmap->N = N;
3024   PetscCall(PetscLayoutSetUp(mat->rmap));
3025   PetscCall(PetscLayoutSetUp(mat->cmap));
3026 
3027   /* check if the matrix sizes are correct */
3028   PetscCall(MatGetSize(mat,&rows,&cols));
3029   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3030 
3031   /* read in row lengths and build row indices */
3032   PetscCall(MatGetLocalSize(mat,&m,NULL));
3033   PetscCall(PetscMalloc1(m+1,&rowidxs));
3034   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3035   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3036   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3037   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3038   /* read in column indices and matrix values */
3039   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3040   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3041   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3042   /* store matrix indices and values */
3043   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3044   PetscCall(PetscFree(rowidxs));
3045   PetscCall(PetscFree2(colidxs,matvals));
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 /* Not scalable because of ISAllGather() unless getting all columns. */
3050 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3051 {
3052   IS             iscol_local;
3053   PetscBool      isstride;
3054   PetscMPIInt    lisstride=0,gisstride;
3055 
3056   PetscFunctionBegin;
3057   /* check if we are grabbing all columns*/
3058   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3059 
3060   if (isstride) {
3061     PetscInt  start,len,mstart,mlen;
3062     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3063     PetscCall(ISGetLocalSize(iscol,&len));
3064     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3065     if (mstart == start && mlen-mstart == len) lisstride = 1;
3066   }
3067 
3068   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3069   if (gisstride) {
3070     PetscInt N;
3071     PetscCall(MatGetSize(mat,NULL,&N));
3072     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3073     PetscCall(ISSetIdentity(iscol_local));
3074     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3075   } else {
3076     PetscInt cbs;
3077     PetscCall(ISGetBlockSize(iscol,&cbs));
3078     PetscCall(ISAllGather(iscol,&iscol_local));
3079     PetscCall(ISSetBlockSize(iscol_local,cbs));
3080   }
3081 
3082   *isseq = iscol_local;
3083   PetscFunctionReturn(0);
3084 }
3085 
3086 /*
3087  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3088  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3089 
3090  Input Parameters:
3091    mat - matrix
3092    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3093            i.e., mat->rstart <= isrow[i] < mat->rend
3094    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3095            i.e., mat->cstart <= iscol[i] < mat->cend
3096  Output Parameter:
3097    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3098    iscol_o - sequential column index set for retrieving mat->B
3099    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3100  */
3101 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3102 {
3103   Vec            x,cmap;
3104   const PetscInt *is_idx;
3105   PetscScalar    *xarray,*cmaparray;
3106   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3107   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3108   Mat            B=a->B;
3109   Vec            lvec=a->lvec,lcmap;
3110   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3111   MPI_Comm       comm;
3112   VecScatter     Mvctx=a->Mvctx;
3113 
3114   PetscFunctionBegin;
3115   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3116   PetscCall(ISGetLocalSize(iscol,&ncols));
3117 
3118   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3119   PetscCall(MatCreateVecs(mat,&x,NULL));
3120   PetscCall(VecSet(x,-1.0));
3121   PetscCall(VecDuplicate(x,&cmap));
3122   PetscCall(VecSet(cmap,-1.0));
3123 
3124   /* Get start indices */
3125   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3126   isstart -= ncols;
3127   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3128 
3129   PetscCall(ISGetIndices(iscol,&is_idx));
3130   PetscCall(VecGetArray(x,&xarray));
3131   PetscCall(VecGetArray(cmap,&cmaparray));
3132   PetscCall(PetscMalloc1(ncols,&idx));
3133   for (i=0; i<ncols; i++) {
3134     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3135     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3136     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3137   }
3138   PetscCall(VecRestoreArray(x,&xarray));
3139   PetscCall(VecRestoreArray(cmap,&cmaparray));
3140   PetscCall(ISRestoreIndices(iscol,&is_idx));
3141 
3142   /* Get iscol_d */
3143   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3144   PetscCall(ISGetBlockSize(iscol,&i));
3145   PetscCall(ISSetBlockSize(*iscol_d,i));
3146 
3147   /* Get isrow_d */
3148   PetscCall(ISGetLocalSize(isrow,&m));
3149   rstart = mat->rmap->rstart;
3150   PetscCall(PetscMalloc1(m,&idx));
3151   PetscCall(ISGetIndices(isrow,&is_idx));
3152   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3153   PetscCall(ISRestoreIndices(isrow,&is_idx));
3154 
3155   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3156   PetscCall(ISGetBlockSize(isrow,&i));
3157   PetscCall(ISSetBlockSize(*isrow_d,i));
3158 
3159   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3160   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3161   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3162 
3163   PetscCall(VecDuplicate(lvec,&lcmap));
3164 
3165   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3166   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3167 
3168   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3169   /* off-process column indices */
3170   count = 0;
3171   PetscCall(PetscMalloc1(Bn,&idx));
3172   PetscCall(PetscMalloc1(Bn,&cmap1));
3173 
3174   PetscCall(VecGetArray(lvec,&xarray));
3175   PetscCall(VecGetArray(lcmap,&cmaparray));
3176   for (i=0; i<Bn; i++) {
3177     if (PetscRealPart(xarray[i]) > -1.0) {
3178       idx[count]     = i;                   /* local column index in off-diagonal part B */
3179       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3180       count++;
3181     }
3182   }
3183   PetscCall(VecRestoreArray(lvec,&xarray));
3184   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3185 
3186   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3187   /* cannot ensure iscol_o has same blocksize as iscol! */
3188 
3189   PetscCall(PetscFree(idx));
3190   *garray = cmap1;
3191 
3192   PetscCall(VecDestroy(&x));
3193   PetscCall(VecDestroy(&cmap));
3194   PetscCall(VecDestroy(&lcmap));
3195   PetscFunctionReturn(0);
3196 }
3197 
3198 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3199 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3200 {
3201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3202   Mat            M = NULL;
3203   MPI_Comm       comm;
3204   IS             iscol_d,isrow_d,iscol_o;
3205   Mat            Asub = NULL,Bsub = NULL;
3206   PetscInt       n;
3207 
3208   PetscFunctionBegin;
3209   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3210 
3211   if (call == MAT_REUSE_MATRIX) {
3212     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3213     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3214     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3215 
3216     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3217     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3218 
3219     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3220     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3221 
3222     /* Update diagonal and off-diagonal portions of submat */
3223     asub = (Mat_MPIAIJ*)(*submat)->data;
3224     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3225     PetscCall(ISGetLocalSize(iscol_o,&n));
3226     if (n) {
3227       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3228     }
3229     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3230     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3231 
3232   } else { /* call == MAT_INITIAL_MATRIX) */
3233     const PetscInt *garray;
3234     PetscInt        BsubN;
3235 
3236     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3237     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3238 
3239     /* Create local submatrices Asub and Bsub */
3240     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3241     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3242 
3243     /* Create submatrix M */
3244     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3245 
3246     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3247     asub = (Mat_MPIAIJ*)M->data;
3248 
3249     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3250     n = asub->B->cmap->N;
3251     if (BsubN > n) {
3252       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3253       const PetscInt *idx;
3254       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3255       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3256 
3257       PetscCall(PetscMalloc1(n,&idx_new));
3258       j = 0;
3259       PetscCall(ISGetIndices(iscol_o,&idx));
3260       for (i=0; i<n; i++) {
3261         if (j >= BsubN) break;
3262         while (subgarray[i] > garray[j]) j++;
3263 
3264         if (subgarray[i] == garray[j]) {
3265           idx_new[i] = idx[j++];
3266         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3267       }
3268       PetscCall(ISRestoreIndices(iscol_o,&idx));
3269 
3270       PetscCall(ISDestroy(&iscol_o));
3271       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3272 
3273     } else if (BsubN < n) {
3274       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3275     }
3276 
3277     PetscCall(PetscFree(garray));
3278     *submat = M;
3279 
3280     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3281     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3282     PetscCall(ISDestroy(&isrow_d));
3283 
3284     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3285     PetscCall(ISDestroy(&iscol_d));
3286 
3287     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3288     PetscCall(ISDestroy(&iscol_o));
3289   }
3290   PetscFunctionReturn(0);
3291 }
3292 
3293 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3294 {
3295   IS             iscol_local=NULL,isrow_d;
3296   PetscInt       csize;
3297   PetscInt       n,i,j,start,end;
3298   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3299   MPI_Comm       comm;
3300 
3301   PetscFunctionBegin;
3302   /* If isrow has same processor distribution as mat,
3303      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3304   if (call == MAT_REUSE_MATRIX) {
3305     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3306     if (isrow_d) {
3307       sameRowDist  = PETSC_TRUE;
3308       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3309     } else {
3310       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3311       if (iscol_local) {
3312         sameRowDist  = PETSC_TRUE;
3313         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3314       }
3315     }
3316   } else {
3317     /* Check if isrow has same processor distribution as mat */
3318     sameDist[0] = PETSC_FALSE;
3319     PetscCall(ISGetLocalSize(isrow,&n));
3320     if (!n) {
3321       sameDist[0] = PETSC_TRUE;
3322     } else {
3323       PetscCall(ISGetMinMax(isrow,&i,&j));
3324       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3325       if (i >= start && j < end) {
3326         sameDist[0] = PETSC_TRUE;
3327       }
3328     }
3329 
3330     /* Check if iscol has same processor distribution as mat */
3331     sameDist[1] = PETSC_FALSE;
3332     PetscCall(ISGetLocalSize(iscol,&n));
3333     if (!n) {
3334       sameDist[1] = PETSC_TRUE;
3335     } else {
3336       PetscCall(ISGetMinMax(iscol,&i,&j));
3337       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3338       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3339     }
3340 
3341     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3342     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3343     sameRowDist = tsameDist[0];
3344   }
3345 
3346   if (sameRowDist) {
3347     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3348       /* isrow and iscol have same processor distribution as mat */
3349       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3350       PetscFunctionReturn(0);
3351     } else { /* sameRowDist */
3352       /* isrow has same processor distribution as mat */
3353       if (call == MAT_INITIAL_MATRIX) {
3354         PetscBool sorted;
3355         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3356         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3357         PetscCall(ISGetSize(iscol,&i));
3358         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3359 
3360         PetscCall(ISSorted(iscol_local,&sorted));
3361         if (sorted) {
3362           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3363           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3364           PetscFunctionReturn(0);
3365         }
3366       } else { /* call == MAT_REUSE_MATRIX */
3367         IS iscol_sub;
3368         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3369         if (iscol_sub) {
3370           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3371           PetscFunctionReturn(0);
3372         }
3373       }
3374     }
3375   }
3376 
3377   /* General case: iscol -> iscol_local which has global size of iscol */
3378   if (call == MAT_REUSE_MATRIX) {
3379     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3380     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3381   } else {
3382     if (!iscol_local) {
3383       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3384     }
3385   }
3386 
3387   PetscCall(ISGetLocalSize(iscol,&csize));
3388   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3389 
3390   if (call == MAT_INITIAL_MATRIX) {
3391     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3392     PetscCall(ISDestroy(&iscol_local));
3393   }
3394   PetscFunctionReturn(0);
3395 }
3396 
3397 /*@C
3398      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3399          and "off-diagonal" part of the matrix in CSR format.
3400 
3401    Collective
3402 
3403    Input Parameters:
3404 +  comm - MPI communicator
3405 .  A - "diagonal" portion of matrix
3406 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3407 -  garray - global index of B columns
3408 
3409    Output Parameter:
3410 .   mat - the matrix, with input A as its local diagonal matrix
3411    Level: advanced
3412 
3413    Notes:
3414        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3415        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3416 
3417 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3418 @*/
3419 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3420 {
3421   Mat_MPIAIJ        *maij;
3422   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3423   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3424   const PetscScalar *oa;
3425   Mat               Bnew;
3426   PetscInt          m,n,N;
3427 
3428   PetscFunctionBegin;
3429   PetscCall(MatCreate(comm,mat));
3430   PetscCall(MatGetSize(A,&m,&n));
3431   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3432   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3433   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3434   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3435 
3436   /* Get global columns of mat */
3437   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3438 
3439   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3440   PetscCall(MatSetType(*mat,MATMPIAIJ));
3441   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3442   maij = (Mat_MPIAIJ*)(*mat)->data;
3443 
3444   (*mat)->preallocated = PETSC_TRUE;
3445 
3446   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3447   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3448 
3449   /* Set A as diagonal portion of *mat */
3450   maij->A = A;
3451 
3452   nz = oi[m];
3453   for (i=0; i<nz; i++) {
3454     col   = oj[i];
3455     oj[i] = garray[col];
3456   }
3457 
3458   /* Set Bnew as off-diagonal portion of *mat */
3459   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3460   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3461   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3462   bnew        = (Mat_SeqAIJ*)Bnew->data;
3463   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3464   maij->B     = Bnew;
3465 
3466   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3467 
3468   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3469   b->free_a       = PETSC_FALSE;
3470   b->free_ij      = PETSC_FALSE;
3471   PetscCall(MatDestroy(&B));
3472 
3473   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3474   bnew->free_a       = PETSC_TRUE;
3475   bnew->free_ij      = PETSC_TRUE;
3476 
3477   /* condense columns of maij->B */
3478   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3479   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3480   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3481   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3482   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3487 
3488 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3489 {
3490   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3491   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3492   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3493   Mat            M,Msub,B=a->B;
3494   MatScalar      *aa;
3495   Mat_SeqAIJ     *aij;
3496   PetscInt       *garray = a->garray,*colsub,Ncols;
3497   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3498   IS             iscol_sub,iscmap;
3499   const PetscInt *is_idx,*cmap;
3500   PetscBool      allcolumns=PETSC_FALSE;
3501   MPI_Comm       comm;
3502 
3503   PetscFunctionBegin;
3504   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3505   if (call == MAT_REUSE_MATRIX) {
3506     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3507     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3508     PetscCall(ISGetLocalSize(iscol_sub,&count));
3509 
3510     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3511     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3512 
3513     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3514     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3515 
3516     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3517 
3518   } else { /* call == MAT_INITIAL_MATRIX) */
3519     PetscBool flg;
3520 
3521     PetscCall(ISGetLocalSize(iscol,&n));
3522     PetscCall(ISGetSize(iscol,&Ncols));
3523 
3524     /* (1) iscol -> nonscalable iscol_local */
3525     /* Check for special case: each processor gets entire matrix columns */
3526     PetscCall(ISIdentity(iscol_local,&flg));
3527     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3528     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3529     if (allcolumns) {
3530       iscol_sub = iscol_local;
3531       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3532       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3533 
3534     } else {
3535       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3536       PetscInt *idx,*cmap1,k;
3537       PetscCall(PetscMalloc1(Ncols,&idx));
3538       PetscCall(PetscMalloc1(Ncols,&cmap1));
3539       PetscCall(ISGetIndices(iscol_local,&is_idx));
3540       count = 0;
3541       k     = 0;
3542       for (i=0; i<Ncols; i++) {
3543         j = is_idx[i];
3544         if (j >= cstart && j < cend) {
3545           /* diagonal part of mat */
3546           idx[count]     = j;
3547           cmap1[count++] = i; /* column index in submat */
3548         } else if (Bn) {
3549           /* off-diagonal part of mat */
3550           if (j == garray[k]) {
3551             idx[count]     = j;
3552             cmap1[count++] = i;  /* column index in submat */
3553           } else if (j > garray[k]) {
3554             while (j > garray[k] && k < Bn-1) k++;
3555             if (j == garray[k]) {
3556               idx[count]     = j;
3557               cmap1[count++] = i; /* column index in submat */
3558             }
3559           }
3560         }
3561       }
3562       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3563 
3564       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3565       PetscCall(ISGetBlockSize(iscol,&cbs));
3566       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3567 
3568       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3569     }
3570 
3571     /* (3) Create sequential Msub */
3572     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3573   }
3574 
3575   PetscCall(ISGetLocalSize(iscol_sub,&count));
3576   aij  = (Mat_SeqAIJ*)(Msub)->data;
3577   ii   = aij->i;
3578   PetscCall(ISGetIndices(iscmap,&cmap));
3579 
3580   /*
3581       m - number of local rows
3582       Ncols - number of columns (same on all processors)
3583       rstart - first row in new global matrix generated
3584   */
3585   PetscCall(MatGetSize(Msub,&m,NULL));
3586 
3587   if (call == MAT_INITIAL_MATRIX) {
3588     /* (4) Create parallel newmat */
3589     PetscMPIInt    rank,size;
3590     PetscInt       csize;
3591 
3592     PetscCallMPI(MPI_Comm_size(comm,&size));
3593     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3594 
3595     /*
3596         Determine the number of non-zeros in the diagonal and off-diagonal
3597         portions of the matrix in order to do correct preallocation
3598     */
3599 
3600     /* first get start and end of "diagonal" columns */
3601     PetscCall(ISGetLocalSize(iscol,&csize));
3602     if (csize == PETSC_DECIDE) {
3603       PetscCall(ISGetSize(isrow,&mglobal));
3604       if (mglobal == Ncols) { /* square matrix */
3605         nlocal = m;
3606       } else {
3607         nlocal = Ncols/size + ((Ncols % size) > rank);
3608       }
3609     } else {
3610       nlocal = csize;
3611     }
3612     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3613     rstart = rend - nlocal;
3614     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3615 
3616     /* next, compute all the lengths */
3617     jj    = aij->j;
3618     PetscCall(PetscMalloc1(2*m+1,&dlens));
3619     olens = dlens + m;
3620     for (i=0; i<m; i++) {
3621       jend = ii[i+1] - ii[i];
3622       olen = 0;
3623       dlen = 0;
3624       for (j=0; j<jend; j++) {
3625         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3626         else dlen++;
3627         jj++;
3628       }
3629       olens[i] = olen;
3630       dlens[i] = dlen;
3631     }
3632 
3633     PetscCall(ISGetBlockSize(isrow,&bs));
3634     PetscCall(ISGetBlockSize(iscol,&cbs));
3635 
3636     PetscCall(MatCreate(comm,&M));
3637     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3638     PetscCall(MatSetBlockSizes(M,bs,cbs));
3639     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3640     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3641     PetscCall(PetscFree(dlens));
3642 
3643   } else { /* call == MAT_REUSE_MATRIX */
3644     M    = *newmat;
3645     PetscCall(MatGetLocalSize(M,&i,NULL));
3646     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3647     PetscCall(MatZeroEntries(M));
3648     /*
3649          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3650        rather than the slower MatSetValues().
3651     */
3652     M->was_assembled = PETSC_TRUE;
3653     M->assembled     = PETSC_FALSE;
3654   }
3655 
3656   /* (5) Set values of Msub to *newmat */
3657   PetscCall(PetscMalloc1(count,&colsub));
3658   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3659 
3660   jj   = aij->j;
3661   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3662   for (i=0; i<m; i++) {
3663     row = rstart + i;
3664     nz  = ii[i+1] - ii[i];
3665     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3666     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3667     jj += nz; aa += nz;
3668   }
3669   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3670   PetscCall(ISRestoreIndices(iscmap,&cmap));
3671 
3672   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3673   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3674 
3675   PetscCall(PetscFree(colsub));
3676 
3677   /* save Msub, iscol_sub and iscmap used in processor for next request */
3678   if (call == MAT_INITIAL_MATRIX) {
3679     *newmat = M;
3680     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3681     PetscCall(MatDestroy(&Msub));
3682 
3683     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3684     PetscCall(ISDestroy(&iscol_sub));
3685 
3686     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3687     PetscCall(ISDestroy(&iscmap));
3688 
3689     if (iscol_local) {
3690       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3691       PetscCall(ISDestroy(&iscol_local));
3692     }
3693   }
3694   PetscFunctionReturn(0);
3695 }
3696 
3697 /*
3698     Not great since it makes two copies of the submatrix, first an SeqAIJ
3699   in local and then by concatenating the local matrices the end result.
3700   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3701 
3702   Note: This requires a sequential iscol with all indices.
3703 */
3704 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3705 {
3706   PetscMPIInt    rank,size;
3707   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3708   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3709   Mat            M,Mreuse;
3710   MatScalar      *aa,*vwork;
3711   MPI_Comm       comm;
3712   Mat_SeqAIJ     *aij;
3713   PetscBool      colflag,allcolumns=PETSC_FALSE;
3714 
3715   PetscFunctionBegin;
3716   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3717   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3718   PetscCallMPI(MPI_Comm_size(comm,&size));
3719 
3720   /* Check for special case: each processor gets entire matrix columns */
3721   PetscCall(ISIdentity(iscol,&colflag));
3722   PetscCall(ISGetLocalSize(iscol,&n));
3723   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3724   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3725 
3726   if (call ==  MAT_REUSE_MATRIX) {
3727     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3728     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3729     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3730   } else {
3731     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3732   }
3733 
3734   /*
3735       m - number of local rows
3736       n - number of columns (same on all processors)
3737       rstart - first row in new global matrix generated
3738   */
3739   PetscCall(MatGetSize(Mreuse,&m,&n));
3740   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3741   if (call == MAT_INITIAL_MATRIX) {
3742     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3743     ii  = aij->i;
3744     jj  = aij->j;
3745 
3746     /*
3747         Determine the number of non-zeros in the diagonal and off-diagonal
3748         portions of the matrix in order to do correct preallocation
3749     */
3750 
3751     /* first get start and end of "diagonal" columns */
3752     if (csize == PETSC_DECIDE) {
3753       PetscCall(ISGetSize(isrow,&mglobal));
3754       if (mglobal == n) { /* square matrix */
3755         nlocal = m;
3756       } else {
3757         nlocal = n/size + ((n % size) > rank);
3758       }
3759     } else {
3760       nlocal = csize;
3761     }
3762     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3763     rstart = rend - nlocal;
3764     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3765 
3766     /* next, compute all the lengths */
3767     PetscCall(PetscMalloc1(2*m+1,&dlens));
3768     olens = dlens + m;
3769     for (i=0; i<m; i++) {
3770       jend = ii[i+1] - ii[i];
3771       olen = 0;
3772       dlen = 0;
3773       for (j=0; j<jend; j++) {
3774         if (*jj < rstart || *jj >= rend) olen++;
3775         else dlen++;
3776         jj++;
3777       }
3778       olens[i] = olen;
3779       dlens[i] = dlen;
3780     }
3781     PetscCall(MatCreate(comm,&M));
3782     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3783     PetscCall(MatSetBlockSizes(M,bs,cbs));
3784     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3785     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3786     PetscCall(PetscFree(dlens));
3787   } else {
3788     PetscInt ml,nl;
3789 
3790     M    = *newmat;
3791     PetscCall(MatGetLocalSize(M,&ml,&nl));
3792     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3793     PetscCall(MatZeroEntries(M));
3794     /*
3795          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3796        rather than the slower MatSetValues().
3797     */
3798     M->was_assembled = PETSC_TRUE;
3799     M->assembled     = PETSC_FALSE;
3800   }
3801   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3802   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3803   ii   = aij->i;
3804   jj   = aij->j;
3805 
3806   /* trigger copy to CPU if needed */
3807   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3808   for (i=0; i<m; i++) {
3809     row   = rstart + i;
3810     nz    = ii[i+1] - ii[i];
3811     cwork = jj; jj += nz;
3812     vwork = aa; aa += nz;
3813     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3814   }
3815   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3816 
3817   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3818   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3819   *newmat = M;
3820 
3821   /* save submatrix used in processor for next request */
3822   if (call ==  MAT_INITIAL_MATRIX) {
3823     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3824     PetscCall(MatDestroy(&Mreuse));
3825   }
3826   PetscFunctionReturn(0);
3827 }
3828 
3829 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3830 {
3831   PetscInt       m,cstart, cend,j,nnz,i,d;
3832   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3833   const PetscInt *JJ;
3834   PetscBool      nooffprocentries;
3835 
3836   PetscFunctionBegin;
3837   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3838 
3839   PetscCall(PetscLayoutSetUp(B->rmap));
3840   PetscCall(PetscLayoutSetUp(B->cmap));
3841   m      = B->rmap->n;
3842   cstart = B->cmap->rstart;
3843   cend   = B->cmap->rend;
3844   rstart = B->rmap->rstart;
3845 
3846   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3847 
3848   if (PetscDefined(USE_DEBUG)) {
3849     for (i=0; i<m; i++) {
3850       nnz = Ii[i+1]- Ii[i];
3851       JJ  = J + Ii[i];
3852       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3853       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3854       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3855     }
3856   }
3857 
3858   for (i=0; i<m; i++) {
3859     nnz     = Ii[i+1]- Ii[i];
3860     JJ      = J + Ii[i];
3861     nnz_max = PetscMax(nnz_max,nnz);
3862     d       = 0;
3863     for (j=0; j<nnz; j++) {
3864       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3865     }
3866     d_nnz[i] = d;
3867     o_nnz[i] = nnz - d;
3868   }
3869   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3870   PetscCall(PetscFree2(d_nnz,o_nnz));
3871 
3872   for (i=0; i<m; i++) {
3873     ii   = i + rstart;
3874     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3875   }
3876   nooffprocentries    = B->nooffprocentries;
3877   B->nooffprocentries = PETSC_TRUE;
3878   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3879   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3880   B->nooffprocentries = nooffprocentries;
3881 
3882   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3883   PetscFunctionReturn(0);
3884 }
3885 
3886 /*@
3887    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3888    (the default parallel PETSc format).
3889 
3890    Collective
3891 
3892    Input Parameters:
3893 +  B - the matrix
3894 .  i - the indices into j for the start of each local row (starts with zero)
3895 .  j - the column indices for each local row (starts with zero)
3896 -  v - optional values in the matrix
3897 
3898    Level: developer
3899 
3900    Notes:
3901        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3902      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3903      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3904 
3905        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3906 
3907        The format which is used for the sparse matrix input, is equivalent to a
3908     row-major ordering.. i.e for the following matrix, the input data expected is
3909     as shown
3910 
3911 $        1 0 0
3912 $        2 0 3     P0
3913 $       -------
3914 $        4 5 6     P1
3915 $
3916 $     Process0 [P0]: rows_owned=[0,1]
3917 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3918 $        j =  {0,0,2}  [size = 3]
3919 $        v =  {1,2,3}  [size = 3]
3920 $
3921 $     Process1 [P1]: rows_owned=[2]
3922 $        i =  {0,3}    [size = nrow+1  = 1+1]
3923 $        j =  {0,1,2}  [size = 3]
3924 $        v =  {4,5,6}  [size = 3]
3925 
3926 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3927           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3928 @*/
3929 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3930 {
3931   PetscFunctionBegin;
3932   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3933   PetscFunctionReturn(0);
3934 }
3935 
3936 /*@C
3937    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3938    (the default parallel PETSc format).  For good matrix assembly performance
3939    the user should preallocate the matrix storage by setting the parameters
3940    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3941    performance can be increased by more than a factor of 50.
3942 
3943    Collective
3944 
3945    Input Parameters:
3946 +  B - the matrix
3947 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3948            (same value is used for all local rows)
3949 .  d_nnz - array containing the number of nonzeros in the various rows of the
3950            DIAGONAL portion of the local submatrix (possibly different for each row)
3951            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3952            The size of this array is equal to the number of local rows, i.e 'm'.
3953            For matrices that will be factored, you must leave room for (and set)
3954            the diagonal entry even if it is zero.
3955 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3956            submatrix (same value is used for all local rows).
3957 -  o_nnz - array containing the number of nonzeros in the various rows of the
3958            OFF-DIAGONAL portion of the local submatrix (possibly different for
3959            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3960            structure. The size of this array is equal to the number
3961            of local rows, i.e 'm'.
3962 
3963    If the *_nnz parameter is given then the *_nz parameter is ignored
3964 
3965    The AIJ format (also called the Yale sparse matrix format or
3966    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3967    storage.  The stored row and column indices begin with zero.
3968    See Users-Manual: ch_mat for details.
3969 
3970    The parallel matrix is partitioned such that the first m0 rows belong to
3971    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3972    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3973 
3974    The DIAGONAL portion of the local submatrix of a processor can be defined
3975    as the submatrix which is obtained by extraction the part corresponding to
3976    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3977    first row that belongs to the processor, r2 is the last row belonging to
3978    the this processor, and c1-c2 is range of indices of the local part of a
3979    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3980    common case of a square matrix, the row and column ranges are the same and
3981    the DIAGONAL part is also square. The remaining portion of the local
3982    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3983 
3984    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3985 
3986    You can call MatGetInfo() to get information on how effective the preallocation was;
3987    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3988    You can also run with the option -info and look for messages with the string
3989    malloc in them to see if additional memory allocation was needed.
3990 
3991    Example usage:
3992 
3993    Consider the following 8x8 matrix with 34 non-zero values, that is
3994    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3995    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3996    as follows:
3997 
3998 .vb
3999             1  2  0  |  0  3  0  |  0  4
4000     Proc0   0  5  6  |  7  0  0  |  8  0
4001             9  0 10  | 11  0  0  | 12  0
4002     -------------------------------------
4003            13  0 14  | 15 16 17  |  0  0
4004     Proc1   0 18  0  | 19 20 21  |  0  0
4005             0  0  0  | 22 23  0  | 24  0
4006     -------------------------------------
4007     Proc2  25 26 27  |  0  0 28  | 29  0
4008            30  0  0  | 31 32 33  |  0 34
4009 .ve
4010 
4011    This can be represented as a collection of submatrices as:
4012 
4013 .vb
4014       A B C
4015       D E F
4016       G H I
4017 .ve
4018 
4019    Where the submatrices A,B,C are owned by proc0, D,E,F are
4020    owned by proc1, G,H,I are owned by proc2.
4021 
4022    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4023    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4024    The 'M','N' parameters are 8,8, and have the same values on all procs.
4025 
4026    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4027    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4028    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4029    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4030    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4031    matrix, ans [DF] as another SeqAIJ matrix.
4032 
4033    When d_nz, o_nz parameters are specified, d_nz storage elements are
4034    allocated for every row of the local diagonal submatrix, and o_nz
4035    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4036    One way to choose d_nz and o_nz is to use the max nonzerors per local
4037    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4038    In this case, the values of d_nz,o_nz are:
4039 .vb
4040      proc0 : dnz = 2, o_nz = 2
4041      proc1 : dnz = 3, o_nz = 2
4042      proc2 : dnz = 1, o_nz = 4
4043 .ve
4044    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4045    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4046    for proc3. i.e we are using 12+15+10=37 storage locations to store
4047    34 values.
4048 
4049    When d_nnz, o_nnz parameters are specified, the storage is specified
4050    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4051    In the above case the values for d_nnz,o_nnz are:
4052 .vb
4053      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4054      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4055      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4056 .ve
4057    Here the space allocated is sum of all the above values i.e 34, and
4058    hence pre-allocation is perfect.
4059 
4060    Level: intermediate
4061 
4062 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4063           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4064 @*/
4065 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4066 {
4067   PetscFunctionBegin;
4068   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4069   PetscValidType(B,1);
4070   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4071   PetscFunctionReturn(0);
4072 }
4073 
4074 /*@
4075      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4076          CSR format for the local rows.
4077 
4078    Collective
4079 
4080    Input Parameters:
4081 +  comm - MPI communicator
4082 .  m - number of local rows (Cannot be PETSC_DECIDE)
4083 .  n - This value should be the same as the local size used in creating the
4084        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4085        calculated if N is given) For square matrices n is almost always m.
4086 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4087 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4088 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4089 .   j - column indices
4090 -   a - matrix values
4091 
4092    Output Parameter:
4093 .   mat - the matrix
4094 
4095    Level: intermediate
4096 
4097    Notes:
4098        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4099      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4100      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4101 
4102        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4103 
4104        The format which is used for the sparse matrix input, is equivalent to a
4105     row-major ordering.. i.e for the following matrix, the input data expected is
4106     as shown
4107 
4108        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4109 
4110 $        1 0 0
4111 $        2 0 3     P0
4112 $       -------
4113 $        4 5 6     P1
4114 $
4115 $     Process0 [P0]: rows_owned=[0,1]
4116 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4117 $        j =  {0,0,2}  [size = 3]
4118 $        v =  {1,2,3}  [size = 3]
4119 $
4120 $     Process1 [P1]: rows_owned=[2]
4121 $        i =  {0,3}    [size = nrow+1  = 1+1]
4122 $        j =  {0,1,2}  [size = 3]
4123 $        v =  {4,5,6}  [size = 3]
4124 
4125 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4126           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4127 @*/
4128 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4129 {
4130   PetscFunctionBegin;
4131   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4132   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4133   PetscCall(MatCreate(comm,mat));
4134   PetscCall(MatSetSizes(*mat,m,n,M,N));
4135   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4136   PetscCall(MatSetType(*mat,MATMPIAIJ));
4137   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4138   PetscFunctionReturn(0);
4139 }
4140 
4141 /*@
4142      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4143          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4144 
4145    Collective
4146 
4147    Input Parameters:
4148 +  mat - the matrix
4149 .  m - number of local rows (Cannot be PETSC_DECIDE)
4150 .  n - This value should be the same as the local size used in creating the
4151        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4152        calculated if N is given) For square matrices n is almost always m.
4153 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4154 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4155 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4156 .  J - column indices
4157 -  v - matrix values
4158 
4159    Level: intermediate
4160 
4161 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4162           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4163 @*/
4164 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4165 {
4166   PetscInt       cstart,nnz,i,j;
4167   PetscInt       *ld;
4168   PetscBool      nooffprocentries;
4169   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4170   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4171   PetscScalar    *ad,*ao;
4172   const PetscInt *Adi = Ad->i;
4173   PetscInt       ldi,Iii,md;
4174 
4175   PetscFunctionBegin;
4176   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4177   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4178   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4179   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4180 
4181   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4182   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4183   cstart = mat->cmap->rstart;
4184   if (!Aij->ld) {
4185     /* count number of entries below block diagonal */
4186     PetscCall(PetscCalloc1(m,&ld));
4187     Aij->ld = ld;
4188     for (i=0; i<m; i++) {
4189       nnz  = Ii[i+1]- Ii[i];
4190       j     = 0;
4191       while  (J[j] < cstart && j < nnz) {j++;}
4192       J    += nnz;
4193       ld[i] = j;
4194     }
4195   } else {
4196     ld = Aij->ld;
4197   }
4198 
4199   for (i=0; i<m; i++) {
4200     nnz  = Ii[i+1]- Ii[i];
4201     Iii  = Ii[i];
4202     ldi  = ld[i];
4203     md   = Adi[i+1]-Adi[i];
4204     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4205     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4206     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4207     ad  += md;
4208     ao  += nnz - md;
4209   }
4210   nooffprocentries      = mat->nooffprocentries;
4211   mat->nooffprocentries = PETSC_TRUE;
4212   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4213   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4214   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4215   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4216   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4217   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4218   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4219   mat->nooffprocentries = nooffprocentries;
4220   PetscFunctionReturn(0);
4221 }
4222 
4223 /*@C
4224    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4225    (the default parallel PETSc format).  For good matrix assembly performance
4226    the user should preallocate the matrix storage by setting the parameters
4227    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4228    performance can be increased by more than a factor of 50.
4229 
4230    Collective
4231 
4232    Input Parameters:
4233 +  comm - MPI communicator
4234 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4235            This value should be the same as the local size used in creating the
4236            y vector for the matrix-vector product y = Ax.
4237 .  n - This value should be the same as the local size used in creating the
4238        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4239        calculated if N is given) For square matrices n is almost always m.
4240 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4241 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4242 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4243            (same value is used for all local rows)
4244 .  d_nnz - array containing the number of nonzeros in the various rows of the
4245            DIAGONAL portion of the local submatrix (possibly different for each row)
4246            or NULL, if d_nz is used to specify the nonzero structure.
4247            The size of this array is equal to the number of local rows, i.e 'm'.
4248 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4249            submatrix (same value is used for all local rows).
4250 -  o_nnz - array containing the number of nonzeros in the various rows of the
4251            OFF-DIAGONAL portion of the local submatrix (possibly different for
4252            each row) or NULL, if o_nz is used to specify the nonzero
4253            structure. The size of this array is equal to the number
4254            of local rows, i.e 'm'.
4255 
4256    Output Parameter:
4257 .  A - the matrix
4258 
4259    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4260    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4261    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4262 
4263    Notes:
4264    If the *_nnz parameter is given then the *_nz parameter is ignored
4265 
4266    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4267    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4268    storage requirements for this matrix.
4269 
4270    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4271    processor than it must be used on all processors that share the object for
4272    that argument.
4273 
4274    The user MUST specify either the local or global matrix dimensions
4275    (possibly both).
4276 
4277    The parallel matrix is partitioned across processors such that the
4278    first m0 rows belong to process 0, the next m1 rows belong to
4279    process 1, the next m2 rows belong to process 2 etc.. where
4280    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4281    values corresponding to [m x N] submatrix.
4282 
4283    The columns are logically partitioned with the n0 columns belonging
4284    to 0th partition, the next n1 columns belonging to the next
4285    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4286 
4287    The DIAGONAL portion of the local submatrix on any given processor
4288    is the submatrix corresponding to the rows and columns m,n
4289    corresponding to the given processor. i.e diagonal matrix on
4290    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4291    etc. The remaining portion of the local submatrix [m x (N-n)]
4292    constitute the OFF-DIAGONAL portion. The example below better
4293    illustrates this concept.
4294 
4295    For a square global matrix we define each processor's diagonal portion
4296    to be its local rows and the corresponding columns (a square submatrix);
4297    each processor's off-diagonal portion encompasses the remainder of the
4298    local matrix (a rectangular submatrix).
4299 
4300    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4301 
4302    When calling this routine with a single process communicator, a matrix of
4303    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4304    type of communicator, use the construction mechanism
4305 .vb
4306      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4307 .ve
4308 
4309 $     MatCreate(...,&A);
4310 $     MatSetType(A,MATMPIAIJ);
4311 $     MatSetSizes(A, m,n,M,N);
4312 $     MatMPIAIJSetPreallocation(A,...);
4313 
4314    By default, this format uses inodes (identical nodes) when possible.
4315    We search for consecutive rows with the same nonzero structure, thereby
4316    reusing matrix information to achieve increased efficiency.
4317 
4318    Options Database Keys:
4319 +  -mat_no_inode  - Do not use inodes
4320 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4321 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4322         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4323         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4324 
4325    Example usage:
4326 
4327    Consider the following 8x8 matrix with 34 non-zero values, that is
4328    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4329    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4330    as follows
4331 
4332 .vb
4333             1  2  0  |  0  3  0  |  0  4
4334     Proc0   0  5  6  |  7  0  0  |  8  0
4335             9  0 10  | 11  0  0  | 12  0
4336     -------------------------------------
4337            13  0 14  | 15 16 17  |  0  0
4338     Proc1   0 18  0  | 19 20 21  |  0  0
4339             0  0  0  | 22 23  0  | 24  0
4340     -------------------------------------
4341     Proc2  25 26 27  |  0  0 28  | 29  0
4342            30  0  0  | 31 32 33  |  0 34
4343 .ve
4344 
4345    This can be represented as a collection of submatrices as
4346 
4347 .vb
4348       A B C
4349       D E F
4350       G H I
4351 .ve
4352 
4353    Where the submatrices A,B,C are owned by proc0, D,E,F are
4354    owned by proc1, G,H,I are owned by proc2.
4355 
4356    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4357    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4358    The 'M','N' parameters are 8,8, and have the same values on all procs.
4359 
4360    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4361    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4362    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4363    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4364    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4365    matrix, ans [DF] as another SeqAIJ matrix.
4366 
4367    When d_nz, o_nz parameters are specified, d_nz storage elements are
4368    allocated for every row of the local diagonal submatrix, and o_nz
4369    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4370    One way to choose d_nz and o_nz is to use the max nonzerors per local
4371    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4372    In this case, the values of d_nz,o_nz are
4373 .vb
4374      proc0 : dnz = 2, o_nz = 2
4375      proc1 : dnz = 3, o_nz = 2
4376      proc2 : dnz = 1, o_nz = 4
4377 .ve
4378    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4379    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4380    for proc3. i.e we are using 12+15+10=37 storage locations to store
4381    34 values.
4382 
4383    When d_nnz, o_nnz parameters are specified, the storage is specified
4384    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4385    In the above case the values for d_nnz,o_nnz are
4386 .vb
4387      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4388      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4389      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4390 .ve
4391    Here the space allocated is sum of all the above values i.e 34, and
4392    hence pre-allocation is perfect.
4393 
4394    Level: intermediate
4395 
4396 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4397           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4398 @*/
4399 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4400 {
4401   PetscMPIInt    size;
4402 
4403   PetscFunctionBegin;
4404   PetscCall(MatCreate(comm,A));
4405   PetscCall(MatSetSizes(*A,m,n,M,N));
4406   PetscCallMPI(MPI_Comm_size(comm,&size));
4407   if (size > 1) {
4408     PetscCall(MatSetType(*A,MATMPIAIJ));
4409     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4410   } else {
4411     PetscCall(MatSetType(*A,MATSEQAIJ));
4412     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4413   }
4414   PetscFunctionReturn(0);
4415 }
4416 
4417 /*@C
4418   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4419 
4420   Not collective
4421 
4422   Input Parameter:
4423 . A - The MPIAIJ matrix
4424 
4425   Output Parameters:
4426 + Ad - The local diagonal block as a SeqAIJ matrix
4427 . Ao - The local off-diagonal block as a SeqAIJ matrix
4428 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4429 
4430   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4431   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4432   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4433   local column numbers to global column numbers in the original matrix.
4434 
4435   Level: intermediate
4436 
4437 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4438 @*/
4439 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4440 {
4441   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4442   PetscBool      flg;
4443 
4444   PetscFunctionBegin;
4445   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4446   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4447   if (Ad)     *Ad     = a->A;
4448   if (Ao)     *Ao     = a->B;
4449   if (colmap) *colmap = a->garray;
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4454 {
4455   PetscInt       m,N,i,rstart,nnz,Ii;
4456   PetscInt       *indx;
4457   PetscScalar    *values;
4458   MatType        rootType;
4459 
4460   PetscFunctionBegin;
4461   PetscCall(MatGetSize(inmat,&m,&N));
4462   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4463     PetscInt       *dnz,*onz,sum,bs,cbs;
4464 
4465     if (n == PETSC_DECIDE) {
4466       PetscCall(PetscSplitOwnership(comm,&n,&N));
4467     }
4468     /* Check sum(n) = N */
4469     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4470     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4471 
4472     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4473     rstart -= m;
4474 
4475     MatPreallocateBegin(comm,m,n,dnz,onz);
4476     for (i=0; i<m; i++) {
4477       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4478       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4479       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4480     }
4481 
4482     PetscCall(MatCreate(comm,outmat));
4483     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4484     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4485     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4486     PetscCall(MatGetRootType_Private(inmat,&rootType));
4487     PetscCall(MatSetType(*outmat,rootType));
4488     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4489     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4490     MatPreallocateEnd(dnz,onz);
4491     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4492   }
4493 
4494   /* numeric phase */
4495   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4496   for (i=0; i<m; i++) {
4497     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4498     Ii   = i + rstart;
4499     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4500     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4501   }
4502   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4503   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4508 {
4509   PetscMPIInt       rank;
4510   PetscInt          m,N,i,rstart,nnz;
4511   size_t            len;
4512   const PetscInt    *indx;
4513   PetscViewer       out;
4514   char              *name;
4515   Mat               B;
4516   const PetscScalar *values;
4517 
4518   PetscFunctionBegin;
4519   PetscCall(MatGetLocalSize(A,&m,NULL));
4520   PetscCall(MatGetSize(A,NULL,&N));
4521   /* Should this be the type of the diagonal block of A? */
4522   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4523   PetscCall(MatSetSizes(B,m,N,m,N));
4524   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4525   PetscCall(MatSetType(B,MATSEQAIJ));
4526   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4527   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4528   for (i=0; i<m; i++) {
4529     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4530     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4531     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4532   }
4533   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4534   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4535 
4536   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4537   PetscCall(PetscStrlen(outfile,&len));
4538   PetscCall(PetscMalloc1(len+6,&name));
4539   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4540   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4541   PetscCall(PetscFree(name));
4542   PetscCall(MatView(B,out));
4543   PetscCall(PetscViewerDestroy(&out));
4544   PetscCall(MatDestroy(&B));
4545   PetscFunctionReturn(0);
4546 }
4547 
4548 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4549 {
4550   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4551 
4552   PetscFunctionBegin;
4553   if (!merge) PetscFunctionReturn(0);
4554   PetscCall(PetscFree(merge->id_r));
4555   PetscCall(PetscFree(merge->len_s));
4556   PetscCall(PetscFree(merge->len_r));
4557   PetscCall(PetscFree(merge->bi));
4558   PetscCall(PetscFree(merge->bj));
4559   PetscCall(PetscFree(merge->buf_ri[0]));
4560   PetscCall(PetscFree(merge->buf_ri));
4561   PetscCall(PetscFree(merge->buf_rj[0]));
4562   PetscCall(PetscFree(merge->buf_rj));
4563   PetscCall(PetscFree(merge->coi));
4564   PetscCall(PetscFree(merge->coj));
4565   PetscCall(PetscFree(merge->owners_co));
4566   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4567   PetscCall(PetscFree(merge));
4568   PetscFunctionReturn(0);
4569 }
4570 
4571 #include <../src/mat/utils/freespace.h>
4572 #include <petscbt.h>
4573 
4574 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4575 {
4576   MPI_Comm            comm;
4577   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4578   PetscMPIInt         size,rank,taga,*len_s;
4579   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4580   PetscInt            proc,m;
4581   PetscInt            **buf_ri,**buf_rj;
4582   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4583   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4584   MPI_Request         *s_waits,*r_waits;
4585   MPI_Status          *status;
4586   const MatScalar     *aa,*a_a;
4587   MatScalar           **abuf_r,*ba_i;
4588   Mat_Merge_SeqsToMPI *merge;
4589   PetscContainer      container;
4590 
4591   PetscFunctionBegin;
4592   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4593   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4594 
4595   PetscCallMPI(MPI_Comm_size(comm,&size));
4596   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4597 
4598   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4599   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4600   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4601   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4602   aa   = a_a;
4603 
4604   bi     = merge->bi;
4605   bj     = merge->bj;
4606   buf_ri = merge->buf_ri;
4607   buf_rj = merge->buf_rj;
4608 
4609   PetscCall(PetscMalloc1(size,&status));
4610   owners = merge->rowmap->range;
4611   len_s  = merge->len_s;
4612 
4613   /* send and recv matrix values */
4614   /*-----------------------------*/
4615   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4616   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4617 
4618   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4619   for (proc=0,k=0; proc<size; proc++) {
4620     if (!len_s[proc]) continue;
4621     i    = owners[proc];
4622     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4623     k++;
4624   }
4625 
4626   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4627   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4628   PetscCall(PetscFree(status));
4629 
4630   PetscCall(PetscFree(s_waits));
4631   PetscCall(PetscFree(r_waits));
4632 
4633   /* insert mat values of mpimat */
4634   /*----------------------------*/
4635   PetscCall(PetscMalloc1(N,&ba_i));
4636   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4637 
4638   for (k=0; k<merge->nrecv; k++) {
4639     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4640     nrows       = *(buf_ri_k[k]);
4641     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4642     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4643   }
4644 
4645   /* set values of ba */
4646   m    = merge->rowmap->n;
4647   for (i=0; i<m; i++) {
4648     arow = owners[rank] + i;
4649     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4650     bnzi = bi[i+1] - bi[i];
4651     PetscCall(PetscArrayzero(ba_i,bnzi));
4652 
4653     /* add local non-zero vals of this proc's seqmat into ba */
4654     anzi   = ai[arow+1] - ai[arow];
4655     aj     = a->j + ai[arow];
4656     aa     = a_a + ai[arow];
4657     nextaj = 0;
4658     for (j=0; nextaj<anzi; j++) {
4659       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4660         ba_i[j] += aa[nextaj++];
4661       }
4662     }
4663 
4664     /* add received vals into ba */
4665     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4666       /* i-th row */
4667       if (i == *nextrow[k]) {
4668         anzi   = *(nextai[k]+1) - *nextai[k];
4669         aj     = buf_rj[k] + *(nextai[k]);
4670         aa     = abuf_r[k] + *(nextai[k]);
4671         nextaj = 0;
4672         for (j=0; nextaj<anzi; j++) {
4673           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4674             ba_i[j] += aa[nextaj++];
4675           }
4676         }
4677         nextrow[k]++; nextai[k]++;
4678       }
4679     }
4680     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4681   }
4682   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4683   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4684   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4685 
4686   PetscCall(PetscFree(abuf_r[0]));
4687   PetscCall(PetscFree(abuf_r));
4688   PetscCall(PetscFree(ba_i));
4689   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4690   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4691   PetscFunctionReturn(0);
4692 }
4693 
4694 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4695 {
4696   Mat                 B_mpi;
4697   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4698   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4699   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4700   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4701   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4702   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4703   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4704   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4705   MPI_Status          *status;
4706   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4707   PetscBT             lnkbt;
4708   Mat_Merge_SeqsToMPI *merge;
4709   PetscContainer      container;
4710 
4711   PetscFunctionBegin;
4712   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4713 
4714   /* make sure it is a PETSc comm */
4715   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4716   PetscCallMPI(MPI_Comm_size(comm,&size));
4717   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4718 
4719   PetscCall(PetscNew(&merge));
4720   PetscCall(PetscMalloc1(size,&status));
4721 
4722   /* determine row ownership */
4723   /*---------------------------------------------------------*/
4724   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4725   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4726   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4727   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4728   PetscCall(PetscLayoutSetUp(merge->rowmap));
4729   PetscCall(PetscMalloc1(size,&len_si));
4730   PetscCall(PetscMalloc1(size,&merge->len_s));
4731 
4732   m      = merge->rowmap->n;
4733   owners = merge->rowmap->range;
4734 
4735   /* determine the number of messages to send, their lengths */
4736   /*---------------------------------------------------------*/
4737   len_s = merge->len_s;
4738 
4739   len          = 0; /* length of buf_si[] */
4740   merge->nsend = 0;
4741   for (proc=0; proc<size; proc++) {
4742     len_si[proc] = 0;
4743     if (proc == rank) {
4744       len_s[proc] = 0;
4745     } else {
4746       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4747       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4748     }
4749     if (len_s[proc]) {
4750       merge->nsend++;
4751       nrows = 0;
4752       for (i=owners[proc]; i<owners[proc+1]; i++) {
4753         if (ai[i+1] > ai[i]) nrows++;
4754       }
4755       len_si[proc] = 2*(nrows+1);
4756       len         += len_si[proc];
4757     }
4758   }
4759 
4760   /* determine the number and length of messages to receive for ij-structure */
4761   /*-------------------------------------------------------------------------*/
4762   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4763   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4764 
4765   /* post the Irecv of j-structure */
4766   /*-------------------------------*/
4767   PetscCall(PetscCommGetNewTag(comm,&tagj));
4768   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4769 
4770   /* post the Isend of j-structure */
4771   /*--------------------------------*/
4772   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4773 
4774   for (proc=0, k=0; proc<size; proc++) {
4775     if (!len_s[proc]) continue;
4776     i    = owners[proc];
4777     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4778     k++;
4779   }
4780 
4781   /* receives and sends of j-structure are complete */
4782   /*------------------------------------------------*/
4783   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4784   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4785 
4786   /* send and recv i-structure */
4787   /*---------------------------*/
4788   PetscCall(PetscCommGetNewTag(comm,&tagi));
4789   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4790 
4791   PetscCall(PetscMalloc1(len+1,&buf_s));
4792   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4793   for (proc=0,k=0; proc<size; proc++) {
4794     if (!len_s[proc]) continue;
4795     /* form outgoing message for i-structure:
4796          buf_si[0]:                 nrows to be sent
4797                [1:nrows]:           row index (global)
4798                [nrows+1:2*nrows+1]: i-structure index
4799     */
4800     /*-------------------------------------------*/
4801     nrows       = len_si[proc]/2 - 1;
4802     buf_si_i    = buf_si + nrows+1;
4803     buf_si[0]   = nrows;
4804     buf_si_i[0] = 0;
4805     nrows       = 0;
4806     for (i=owners[proc]; i<owners[proc+1]; i++) {
4807       anzi = ai[i+1] - ai[i];
4808       if (anzi) {
4809         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4810         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4811         nrows++;
4812       }
4813     }
4814     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4815     k++;
4816     buf_si += len_si[proc];
4817   }
4818 
4819   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4820   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4821 
4822   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4823   for (i=0; i<merge->nrecv; i++) {
4824     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4825   }
4826 
4827   PetscCall(PetscFree(len_si));
4828   PetscCall(PetscFree(len_ri));
4829   PetscCall(PetscFree(rj_waits));
4830   PetscCall(PetscFree2(si_waits,sj_waits));
4831   PetscCall(PetscFree(ri_waits));
4832   PetscCall(PetscFree(buf_s));
4833   PetscCall(PetscFree(status));
4834 
4835   /* compute a local seq matrix in each processor */
4836   /*----------------------------------------------*/
4837   /* allocate bi array and free space for accumulating nonzero column info */
4838   PetscCall(PetscMalloc1(m+1,&bi));
4839   bi[0] = 0;
4840 
4841   /* create and initialize a linked list */
4842   nlnk = N+1;
4843   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4844 
4845   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4846   len  = ai[owners[rank+1]] - ai[owners[rank]];
4847   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4848 
4849   current_space = free_space;
4850 
4851   /* determine symbolic info for each local row */
4852   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4853 
4854   for (k=0; k<merge->nrecv; k++) {
4855     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4856     nrows       = *buf_ri_k[k];
4857     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4858     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4859   }
4860 
4861   MatPreallocateBegin(comm,m,n,dnz,onz);
4862   len  = 0;
4863   for (i=0; i<m; i++) {
4864     bnzi = 0;
4865     /* add local non-zero cols of this proc's seqmat into lnk */
4866     arow  = owners[rank] + i;
4867     anzi  = ai[arow+1] - ai[arow];
4868     aj    = a->j + ai[arow];
4869     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4870     bnzi += nlnk;
4871     /* add received col data into lnk */
4872     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4873       if (i == *nextrow[k]) { /* i-th row */
4874         anzi  = *(nextai[k]+1) - *nextai[k];
4875         aj    = buf_rj[k] + *nextai[k];
4876         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4877         bnzi += nlnk;
4878         nextrow[k]++; nextai[k]++;
4879       }
4880     }
4881     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4882 
4883     /* if free space is not available, make more free space */
4884     if (current_space->local_remaining<bnzi) {
4885       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4886       nspacedouble++;
4887     }
4888     /* copy data into free space, then initialize lnk */
4889     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4890     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4891 
4892     current_space->array           += bnzi;
4893     current_space->local_used      += bnzi;
4894     current_space->local_remaining -= bnzi;
4895 
4896     bi[i+1] = bi[i] + bnzi;
4897   }
4898 
4899   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4900 
4901   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4902   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4903   PetscCall(PetscLLDestroy(lnk,lnkbt));
4904 
4905   /* create symbolic parallel matrix B_mpi */
4906   /*---------------------------------------*/
4907   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4908   PetscCall(MatCreate(comm,&B_mpi));
4909   if (n==PETSC_DECIDE) {
4910     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4911   } else {
4912     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4913   }
4914   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4915   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4916   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4917   MatPreallocateEnd(dnz,onz);
4918   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4919 
4920   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4921   B_mpi->assembled  = PETSC_FALSE;
4922   merge->bi         = bi;
4923   merge->bj         = bj;
4924   merge->buf_ri     = buf_ri;
4925   merge->buf_rj     = buf_rj;
4926   merge->coi        = NULL;
4927   merge->coj        = NULL;
4928   merge->owners_co  = NULL;
4929 
4930   PetscCall(PetscCommDestroy(&comm));
4931 
4932   /* attach the supporting struct to B_mpi for reuse */
4933   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4934   PetscCall(PetscContainerSetPointer(container,merge));
4935   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4936   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4937   PetscCall(PetscContainerDestroy(&container));
4938   *mpimat = B_mpi;
4939 
4940   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4941   PetscFunctionReturn(0);
4942 }
4943 
4944 /*@C
4945       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4946                  matrices from each processor
4947 
4948     Collective
4949 
4950    Input Parameters:
4951 +    comm - the communicators the parallel matrix will live on
4952 .    seqmat - the input sequential matrices
4953 .    m - number of local rows (or PETSC_DECIDE)
4954 .    n - number of local columns (or PETSC_DECIDE)
4955 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4956 
4957    Output Parameter:
4958 .    mpimat - the parallel matrix generated
4959 
4960     Level: advanced
4961 
4962    Notes:
4963      The dimensions of the sequential matrix in each processor MUST be the same.
4964      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4965      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4966 @*/
4967 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4968 {
4969   PetscMPIInt    size;
4970 
4971   PetscFunctionBegin;
4972   PetscCallMPI(MPI_Comm_size(comm,&size));
4973   if (size == 1) {
4974     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4975     if (scall == MAT_INITIAL_MATRIX) {
4976       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4977     } else {
4978       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4979     }
4980     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4981     PetscFunctionReturn(0);
4982   }
4983   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4984   if (scall == MAT_INITIAL_MATRIX) {
4985     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4986   }
4987   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4988   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4989   PetscFunctionReturn(0);
4990 }
4991 
4992 /*@
4993      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4994           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4995           with MatGetSize()
4996 
4997     Not Collective
4998 
4999    Input Parameters:
5000 +    A - the matrix
5001 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5002 
5003    Output Parameter:
5004 .    A_loc - the local sequential matrix generated
5005 
5006     Level: developer
5007 
5008    Notes:
5009      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5010 
5011      Destroy the matrix with MatDestroy()
5012 
5013 .seealso: MatMPIAIJGetLocalMat()
5014 
5015 @*/
5016 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5017 {
5018   PetscBool      mpi;
5019 
5020   PetscFunctionBegin;
5021   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5022   if (mpi) {
5023     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5024   } else {
5025     *A_loc = A;
5026     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5027   }
5028   PetscFunctionReturn(0);
5029 }
5030 
5031 /*@
5032      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5033           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5034           with MatGetSize()
5035 
5036     Not Collective
5037 
5038    Input Parameters:
5039 +    A - the matrix
5040 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5041 
5042    Output Parameter:
5043 .    A_loc - the local sequential matrix generated
5044 
5045     Level: developer
5046 
5047    Notes:
5048      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5049 
5050      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5051      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5052      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5053      modify the values of the returned A_loc.
5054 
5055 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5056 @*/
5057 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5058 {
5059   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5060   Mat_SeqAIJ        *mat,*a,*b;
5061   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5062   const PetscScalar *aa,*ba,*aav,*bav;
5063   PetscScalar       *ca,*cam;
5064   PetscMPIInt       size;
5065   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5066   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5067   PetscBool         match;
5068 
5069   PetscFunctionBegin;
5070   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5071   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5072   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5073   if (size == 1) {
5074     if (scall == MAT_INITIAL_MATRIX) {
5075       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5076       *A_loc = mpimat->A;
5077     } else if (scall == MAT_REUSE_MATRIX) {
5078       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5079     }
5080     PetscFunctionReturn(0);
5081   }
5082 
5083   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5084   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5085   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5086   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5087   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5088   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5089   aa   = aav;
5090   ba   = bav;
5091   if (scall == MAT_INITIAL_MATRIX) {
5092     PetscCall(PetscMalloc1(1+am,&ci));
5093     ci[0] = 0;
5094     for (i=0; i<am; i++) {
5095       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5096     }
5097     PetscCall(PetscMalloc1(1+ci[am],&cj));
5098     PetscCall(PetscMalloc1(1+ci[am],&ca));
5099     k    = 0;
5100     for (i=0; i<am; i++) {
5101       ncols_o = bi[i+1] - bi[i];
5102       ncols_d = ai[i+1] - ai[i];
5103       /* off-diagonal portion of A */
5104       for (jo=0; jo<ncols_o; jo++) {
5105         col = cmap[*bj];
5106         if (col >= cstart) break;
5107         cj[k]   = col; bj++;
5108         ca[k++] = *ba++;
5109       }
5110       /* diagonal portion of A */
5111       for (j=0; j<ncols_d; j++) {
5112         cj[k]   = cstart + *aj++;
5113         ca[k++] = *aa++;
5114       }
5115       /* off-diagonal portion of A */
5116       for (j=jo; j<ncols_o; j++) {
5117         cj[k]   = cmap[*bj++];
5118         ca[k++] = *ba++;
5119       }
5120     }
5121     /* put together the new matrix */
5122     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5123     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5124     /* Since these are PETSc arrays, change flags to free them as necessary. */
5125     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5126     mat->free_a  = PETSC_TRUE;
5127     mat->free_ij = PETSC_TRUE;
5128     mat->nonew   = 0;
5129   } else if (scall == MAT_REUSE_MATRIX) {
5130     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5131     ci   = mat->i;
5132     cj   = mat->j;
5133     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5134     for (i=0; i<am; i++) {
5135       /* off-diagonal portion of A */
5136       ncols_o = bi[i+1] - bi[i];
5137       for (jo=0; jo<ncols_o; jo++) {
5138         col = cmap[*bj];
5139         if (col >= cstart) break;
5140         *cam++ = *ba++; bj++;
5141       }
5142       /* diagonal portion of A */
5143       ncols_d = ai[i+1] - ai[i];
5144       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5145       /* off-diagonal portion of A */
5146       for (j=jo; j<ncols_o; j++) {
5147         *cam++ = *ba++; bj++;
5148       }
5149     }
5150     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5151   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5152   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5153   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5154   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5155   PetscFunctionReturn(0);
5156 }
5157 
5158 /*@
5159      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5160           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5161 
5162     Not Collective
5163 
5164    Input Parameters:
5165 +    A - the matrix
5166 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5167 
5168    Output Parameters:
5169 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5170 -    A_loc - the local sequential matrix generated
5171 
5172     Level: developer
5173 
5174    Notes:
5175      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5176 
5177 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5178 
5179 @*/
5180 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5181 {
5182   Mat            Ao,Ad;
5183   const PetscInt *cmap;
5184   PetscMPIInt    size;
5185   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5186 
5187   PetscFunctionBegin;
5188   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5189   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5190   if (size == 1) {
5191     if (scall == MAT_INITIAL_MATRIX) {
5192       PetscCall(PetscObjectReference((PetscObject)Ad));
5193       *A_loc = Ad;
5194     } else if (scall == MAT_REUSE_MATRIX) {
5195       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5196     }
5197     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5198     PetscFunctionReturn(0);
5199   }
5200   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5201   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5202   if (f) {
5203     PetscCall((*f)(A,scall,glob,A_loc));
5204   } else {
5205     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5206     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5207     Mat_SeqAIJ        *c;
5208     PetscInt          *ai = a->i, *aj = a->j;
5209     PetscInt          *bi = b->i, *bj = b->j;
5210     PetscInt          *ci,*cj;
5211     const PetscScalar *aa,*ba;
5212     PetscScalar       *ca;
5213     PetscInt          i,j,am,dn,on;
5214 
5215     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5216     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5217     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5218     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5219     if (scall == MAT_INITIAL_MATRIX) {
5220       PetscInt k;
5221       PetscCall(PetscMalloc1(1+am,&ci));
5222       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5223       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5224       ci[0] = 0;
5225       for (i=0,k=0; i<am; i++) {
5226         const PetscInt ncols_o = bi[i+1] - bi[i];
5227         const PetscInt ncols_d = ai[i+1] - ai[i];
5228         ci[i+1] = ci[i] + ncols_o + ncols_d;
5229         /* diagonal portion of A */
5230         for (j=0; j<ncols_d; j++,k++) {
5231           cj[k] = *aj++;
5232           ca[k] = *aa++;
5233         }
5234         /* off-diagonal portion of A */
5235         for (j=0; j<ncols_o; j++,k++) {
5236           cj[k] = dn + *bj++;
5237           ca[k] = *ba++;
5238         }
5239       }
5240       /* put together the new matrix */
5241       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5242       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5243       /* Since these are PETSc arrays, change flags to free them as necessary. */
5244       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5245       c->free_a  = PETSC_TRUE;
5246       c->free_ij = PETSC_TRUE;
5247       c->nonew   = 0;
5248       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5249     } else if (scall == MAT_REUSE_MATRIX) {
5250       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5251       for (i=0; i<am; i++) {
5252         const PetscInt ncols_d = ai[i+1] - ai[i];
5253         const PetscInt ncols_o = bi[i+1] - bi[i];
5254         /* diagonal portion of A */
5255         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5256         /* off-diagonal portion of A */
5257         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5258       }
5259       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5260     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5261     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5262     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5263     if (glob) {
5264       PetscInt cst, *gidx;
5265 
5266       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5267       PetscCall(PetscMalloc1(dn+on,&gidx));
5268       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5269       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5270       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5271     }
5272   }
5273   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5274   PetscFunctionReturn(0);
5275 }
5276 
5277 /*@C
5278      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5279 
5280     Not Collective
5281 
5282    Input Parameters:
5283 +    A - the matrix
5284 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5285 -    row, col - index sets of rows and columns to extract (or NULL)
5286 
5287    Output Parameter:
5288 .    A_loc - the local sequential matrix generated
5289 
5290     Level: developer
5291 
5292 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5293 
5294 @*/
5295 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5296 {
5297   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5298   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5299   IS             isrowa,iscola;
5300   Mat            *aloc;
5301   PetscBool      match;
5302 
5303   PetscFunctionBegin;
5304   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5305   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5306   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5307   if (!row) {
5308     start = A->rmap->rstart; end = A->rmap->rend;
5309     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5310   } else {
5311     isrowa = *row;
5312   }
5313   if (!col) {
5314     start = A->cmap->rstart;
5315     cmap  = a->garray;
5316     nzA   = a->A->cmap->n;
5317     nzB   = a->B->cmap->n;
5318     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5319     ncols = 0;
5320     for (i=0; i<nzB; i++) {
5321       if (cmap[i] < start) idx[ncols++] = cmap[i];
5322       else break;
5323     }
5324     imark = i;
5325     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5326     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5327     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5328   } else {
5329     iscola = *col;
5330   }
5331   if (scall != MAT_INITIAL_MATRIX) {
5332     PetscCall(PetscMalloc1(1,&aloc));
5333     aloc[0] = *A_loc;
5334   }
5335   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5336   if (!col) { /* attach global id of condensed columns */
5337     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5338   }
5339   *A_loc = aloc[0];
5340   PetscCall(PetscFree(aloc));
5341   if (!row) {
5342     PetscCall(ISDestroy(&isrowa));
5343   }
5344   if (!col) {
5345     PetscCall(ISDestroy(&iscola));
5346   }
5347   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5348   PetscFunctionReturn(0);
5349 }
5350 
5351 /*
5352  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5353  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5354  * on a global size.
5355  * */
5356 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5357 {
5358   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5359   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5360   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5361   PetscMPIInt              owner;
5362   PetscSFNode              *iremote,*oiremote;
5363   const PetscInt           *lrowindices;
5364   PetscSF                  sf,osf;
5365   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5366   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5367   MPI_Comm                 comm;
5368   ISLocalToGlobalMapping   mapping;
5369   const PetscScalar        *pd_a,*po_a;
5370 
5371   PetscFunctionBegin;
5372   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5373   /* plocalsize is the number of roots
5374    * nrows is the number of leaves
5375    * */
5376   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5377   PetscCall(ISGetLocalSize(rows,&nrows));
5378   PetscCall(PetscCalloc1(nrows,&iremote));
5379   PetscCall(ISGetIndices(rows,&lrowindices));
5380   for (i=0;i<nrows;i++) {
5381     /* Find a remote index and an owner for a row
5382      * The row could be local or remote
5383      * */
5384     owner = 0;
5385     lidx  = 0;
5386     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5387     iremote[i].index = lidx;
5388     iremote[i].rank  = owner;
5389   }
5390   /* Create SF to communicate how many nonzero columns for each row */
5391   PetscCall(PetscSFCreate(comm,&sf));
5392   /* SF will figure out the number of nonzero colunms for each row, and their
5393    * offsets
5394    * */
5395   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5396   PetscCall(PetscSFSetFromOptions(sf));
5397   PetscCall(PetscSFSetUp(sf));
5398 
5399   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5400   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5401   PetscCall(PetscCalloc1(nrows,&pnnz));
5402   roffsets[0] = 0;
5403   roffsets[1] = 0;
5404   for (i=0;i<plocalsize;i++) {
5405     /* diag */
5406     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5407     /* off diag */
5408     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5409     /* compute offsets so that we relative location for each row */
5410     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5411     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5412   }
5413   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5414   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5415   /* 'r' means root, and 'l' means leaf */
5416   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5417   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5418   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5419   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5420   PetscCall(PetscSFDestroy(&sf));
5421   PetscCall(PetscFree(roffsets));
5422   PetscCall(PetscFree(nrcols));
5423   dntotalcols = 0;
5424   ontotalcols = 0;
5425   ncol = 0;
5426   for (i=0;i<nrows;i++) {
5427     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5428     ncol = PetscMax(pnnz[i],ncol);
5429     /* diag */
5430     dntotalcols += nlcols[i*2+0];
5431     /* off diag */
5432     ontotalcols += nlcols[i*2+1];
5433   }
5434   /* We do not need to figure the right number of columns
5435    * since all the calculations will be done by going through the raw data
5436    * */
5437   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5438   PetscCall(MatSetUp(*P_oth));
5439   PetscCall(PetscFree(pnnz));
5440   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5441   /* diag */
5442   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5443   /* off diag */
5444   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5445   /* diag */
5446   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5447   /* off diag */
5448   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5449   dntotalcols = 0;
5450   ontotalcols = 0;
5451   ntotalcols  = 0;
5452   for (i=0;i<nrows;i++) {
5453     owner = 0;
5454     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5455     /* Set iremote for diag matrix */
5456     for (j=0;j<nlcols[i*2+0];j++) {
5457       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5458       iremote[dntotalcols].rank    = owner;
5459       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5460       ilocal[dntotalcols++]        = ntotalcols++;
5461     }
5462     /* off diag */
5463     for (j=0;j<nlcols[i*2+1];j++) {
5464       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5465       oiremote[ontotalcols].rank    = owner;
5466       oilocal[ontotalcols++]        = ntotalcols++;
5467     }
5468   }
5469   PetscCall(ISRestoreIndices(rows,&lrowindices));
5470   PetscCall(PetscFree(loffsets));
5471   PetscCall(PetscFree(nlcols));
5472   PetscCall(PetscSFCreate(comm,&sf));
5473   /* P serves as roots and P_oth is leaves
5474    * Diag matrix
5475    * */
5476   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5477   PetscCall(PetscSFSetFromOptions(sf));
5478   PetscCall(PetscSFSetUp(sf));
5479 
5480   PetscCall(PetscSFCreate(comm,&osf));
5481   /* Off diag */
5482   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5483   PetscCall(PetscSFSetFromOptions(osf));
5484   PetscCall(PetscSFSetUp(osf));
5485   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5486   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5487   /* We operate on the matrix internal data for saving memory */
5488   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5489   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5490   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5491   /* Convert to global indices for diag matrix */
5492   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5493   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5494   /* We want P_oth store global indices */
5495   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5496   /* Use memory scalable approach */
5497   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5498   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5499   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5500   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5501   /* Convert back to local indices */
5502   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5503   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5504   nout = 0;
5505   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5506   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5507   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5508   /* Exchange values */
5509   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5510   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5511   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5512   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5513   /* Stop PETSc from shrinking memory */
5514   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5515   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5516   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5517   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5518   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5519   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5520   PetscCall(PetscSFDestroy(&sf));
5521   PetscCall(PetscSFDestroy(&osf));
5522   PetscFunctionReturn(0);
5523 }
5524 
5525 /*
5526  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5527  * This supports MPIAIJ and MAIJ
5528  * */
5529 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5530 {
5531   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5532   Mat_SeqAIJ            *p_oth;
5533   IS                    rows,map;
5534   PetscHMapI            hamp;
5535   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5536   MPI_Comm              comm;
5537   PetscSF               sf,osf;
5538   PetscBool             has;
5539 
5540   PetscFunctionBegin;
5541   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5542   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5543   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5544    *  and then create a submatrix (that often is an overlapping matrix)
5545    * */
5546   if (reuse == MAT_INITIAL_MATRIX) {
5547     /* Use a hash table to figure out unique keys */
5548     PetscCall(PetscHMapICreate(&hamp));
5549     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5550     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5551     count = 0;
5552     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5553     for (i=0;i<a->B->cmap->n;i++) {
5554       key  = a->garray[i]/dof;
5555       PetscCall(PetscHMapIHas(hamp,key,&has));
5556       if (!has) {
5557         mapping[i] = count;
5558         PetscCall(PetscHMapISet(hamp,key,count++));
5559       } else {
5560         /* Current 'i' has the same value the previous step */
5561         mapping[i] = count-1;
5562       }
5563     }
5564     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5565     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5566     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5567     PetscCall(PetscCalloc1(htsize,&rowindices));
5568     off = 0;
5569     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5570     PetscCall(PetscHMapIDestroy(&hamp));
5571     PetscCall(PetscSortInt(htsize,rowindices));
5572     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5573     /* In case, the matrix was already created but users want to recreate the matrix */
5574     PetscCall(MatDestroy(P_oth));
5575     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5576     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5577     PetscCall(ISDestroy(&map));
5578     PetscCall(ISDestroy(&rows));
5579   } else if (reuse == MAT_REUSE_MATRIX) {
5580     /* If matrix was already created, we simply update values using SF objects
5581      * that as attached to the matrix ealier.
5582      */
5583     const PetscScalar *pd_a,*po_a;
5584 
5585     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5586     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5587     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5588     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5589     /* Update values in place */
5590     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5591     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5592     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5593     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5594     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5595     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5596     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5597     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5598   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5599   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5600   PetscFunctionReturn(0);
5601 }
5602 
5603 /*@C
5604   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5605 
5606   Collective on Mat
5607 
5608   Input Parameters:
5609 + A - the first matrix in mpiaij format
5610 . B - the second matrix in mpiaij format
5611 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5612 
5613   Output Parameters:
5614 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5615 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5616 - B_seq - the sequential matrix generated
5617 
5618   Level: developer
5619 
5620 @*/
5621 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5622 {
5623   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5624   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5625   IS             isrowb,iscolb;
5626   Mat            *bseq=NULL;
5627 
5628   PetscFunctionBegin;
5629   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5630     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5631   }
5632   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5633 
5634   if (scall == MAT_INITIAL_MATRIX) {
5635     start = A->cmap->rstart;
5636     cmap  = a->garray;
5637     nzA   = a->A->cmap->n;
5638     nzB   = a->B->cmap->n;
5639     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5640     ncols = 0;
5641     for (i=0; i<nzB; i++) {  /* row < local row index */
5642       if (cmap[i] < start) idx[ncols++] = cmap[i];
5643       else break;
5644     }
5645     imark = i;
5646     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5647     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5648     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5649     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5650   } else {
5651     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5652     isrowb  = *rowb; iscolb = *colb;
5653     PetscCall(PetscMalloc1(1,&bseq));
5654     bseq[0] = *B_seq;
5655   }
5656   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5657   *B_seq = bseq[0];
5658   PetscCall(PetscFree(bseq));
5659   if (!rowb) {
5660     PetscCall(ISDestroy(&isrowb));
5661   } else {
5662     *rowb = isrowb;
5663   }
5664   if (!colb) {
5665     PetscCall(ISDestroy(&iscolb));
5666   } else {
5667     *colb = iscolb;
5668   }
5669   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5670   PetscFunctionReturn(0);
5671 }
5672 
5673 /*
5674     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5675     of the OFF-DIAGONAL portion of local A
5676 
5677     Collective on Mat
5678 
5679    Input Parameters:
5680 +    A,B - the matrices in mpiaij format
5681 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5682 
5683    Output Parameter:
5684 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5685 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5686 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5687 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5688 
5689     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5690      for this matrix. This is not desirable..
5691 
5692     Level: developer
5693 
5694 */
5695 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5696 {
5697   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5698   Mat_SeqAIJ             *b_oth;
5699   VecScatter             ctx;
5700   MPI_Comm               comm;
5701   const PetscMPIInt      *rprocs,*sprocs;
5702   const PetscInt         *srow,*rstarts,*sstarts;
5703   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5704   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5705   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5706   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5707   PetscMPIInt            size,tag,rank,nreqs;
5708 
5709   PetscFunctionBegin;
5710   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5711   PetscCallMPI(MPI_Comm_size(comm,&size));
5712 
5713   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5714     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5715   }
5716   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5717   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5718 
5719   if (size == 1) {
5720     startsj_s = NULL;
5721     bufa_ptr  = NULL;
5722     *B_oth    = NULL;
5723     PetscFunctionReturn(0);
5724   }
5725 
5726   ctx = a->Mvctx;
5727   tag = ((PetscObject)ctx)->tag;
5728 
5729   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5730   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5731   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5732   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5733   PetscCall(PetscMalloc1(nreqs,&reqs));
5734   rwaits = reqs;
5735   swaits = reqs + nrecvs;
5736 
5737   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5738   if (scall == MAT_INITIAL_MATRIX) {
5739     /* i-array */
5740     /*---------*/
5741     /*  post receives */
5742     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5743     for (i=0; i<nrecvs; i++) {
5744       rowlen = rvalues + rstarts[i]*rbs;
5745       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5746       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5747     }
5748 
5749     /* pack the outgoing message */
5750     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5751 
5752     sstartsj[0] = 0;
5753     rstartsj[0] = 0;
5754     len         = 0; /* total length of j or a array to be sent */
5755     if (nsends) {
5756       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5757       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5758     }
5759     for (i=0; i<nsends; i++) {
5760       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5761       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5762       for (j=0; j<nrows; j++) {
5763         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5764         for (l=0; l<sbs; l++) {
5765           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5766 
5767           rowlen[j*sbs+l] = ncols;
5768 
5769           len += ncols;
5770           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5771         }
5772         k++;
5773       }
5774       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5775 
5776       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5777     }
5778     /* recvs and sends of i-array are completed */
5779     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5780     PetscCall(PetscFree(svalues));
5781 
5782     /* allocate buffers for sending j and a arrays */
5783     PetscCall(PetscMalloc1(len+1,&bufj));
5784     PetscCall(PetscMalloc1(len+1,&bufa));
5785 
5786     /* create i-array of B_oth */
5787     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5788 
5789     b_othi[0] = 0;
5790     len       = 0; /* total length of j or a array to be received */
5791     k         = 0;
5792     for (i=0; i<nrecvs; i++) {
5793       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5794       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5795       for (j=0; j<nrows; j++) {
5796         b_othi[k+1] = b_othi[k] + rowlen[j];
5797         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5798         k++;
5799       }
5800       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5801     }
5802     PetscCall(PetscFree(rvalues));
5803 
5804     /* allocate space for j and a arrays of B_oth */
5805     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5806     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5807 
5808     /* j-array */
5809     /*---------*/
5810     /*  post receives of j-array */
5811     for (i=0; i<nrecvs; i++) {
5812       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5813       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5814     }
5815 
5816     /* pack the outgoing message j-array */
5817     if (nsends) k = sstarts[0];
5818     for (i=0; i<nsends; i++) {
5819       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5820       bufJ  = bufj+sstartsj[i];
5821       for (j=0; j<nrows; j++) {
5822         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5823         for (ll=0; ll<sbs; ll++) {
5824           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5825           for (l=0; l<ncols; l++) {
5826             *bufJ++ = cols[l];
5827           }
5828           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5829         }
5830       }
5831       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5832     }
5833 
5834     /* recvs and sends of j-array are completed */
5835     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5836   } else if (scall == MAT_REUSE_MATRIX) {
5837     sstartsj = *startsj_s;
5838     rstartsj = *startsj_r;
5839     bufa     = *bufa_ptr;
5840     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5841     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5842   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5843 
5844   /* a-array */
5845   /*---------*/
5846   /*  post receives of a-array */
5847   for (i=0; i<nrecvs; i++) {
5848     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5849     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5850   }
5851 
5852   /* pack the outgoing message a-array */
5853   if (nsends) k = sstarts[0];
5854   for (i=0; i<nsends; i++) {
5855     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5856     bufA  = bufa+sstartsj[i];
5857     for (j=0; j<nrows; j++) {
5858       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5859       for (ll=0; ll<sbs; ll++) {
5860         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5861         for (l=0; l<ncols; l++) {
5862           *bufA++ = vals[l];
5863         }
5864         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5865       }
5866     }
5867     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5868   }
5869   /* recvs and sends of a-array are completed */
5870   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5871   PetscCall(PetscFree(reqs));
5872 
5873   if (scall == MAT_INITIAL_MATRIX) {
5874     /* put together the new matrix */
5875     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5876 
5877     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5878     /* Since these are PETSc arrays, change flags to free them as necessary. */
5879     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5880     b_oth->free_a  = PETSC_TRUE;
5881     b_oth->free_ij = PETSC_TRUE;
5882     b_oth->nonew   = 0;
5883 
5884     PetscCall(PetscFree(bufj));
5885     if (!startsj_s || !bufa_ptr) {
5886       PetscCall(PetscFree2(sstartsj,rstartsj));
5887       PetscCall(PetscFree(bufa_ptr));
5888     } else {
5889       *startsj_s = sstartsj;
5890       *startsj_r = rstartsj;
5891       *bufa_ptr  = bufa;
5892     }
5893   } else if (scall == MAT_REUSE_MATRIX) {
5894     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5895   }
5896 
5897   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5898   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5899   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5900   PetscFunctionReturn(0);
5901 }
5902 
5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5906 #if defined(PETSC_HAVE_MKL_SPARSE)
5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5908 #endif
5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5911 #if defined(PETSC_HAVE_ELEMENTAL)
5912 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5913 #endif
5914 #if defined(PETSC_HAVE_SCALAPACK)
5915 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5916 #endif
5917 #if defined(PETSC_HAVE_HYPRE)
5918 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5919 #endif
5920 #if defined(PETSC_HAVE_CUDA)
5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5922 #endif
5923 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5924 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5925 #endif
5926 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5927 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5928 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5929 
5930 /*
5931     Computes (B'*A')' since computing B*A directly is untenable
5932 
5933                n                       p                          p
5934         [             ]       [             ]         [                 ]
5935       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5936         [             ]       [             ]         [                 ]
5937 
5938 */
5939 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5940 {
5941   Mat            At,Bt,Ct;
5942 
5943   PetscFunctionBegin;
5944   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5945   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5946   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5947   PetscCall(MatDestroy(&At));
5948   PetscCall(MatDestroy(&Bt));
5949   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5950   PetscCall(MatDestroy(&Ct));
5951   PetscFunctionReturn(0);
5952 }
5953 
5954 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5955 {
5956   PetscBool      cisdense;
5957 
5958   PetscFunctionBegin;
5959   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5960   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5961   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5962   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5963   if (!cisdense) {
5964     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5965   }
5966   PetscCall(MatSetUp(C));
5967 
5968   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5969   PetscFunctionReturn(0);
5970 }
5971 
5972 /* ----------------------------------------------------------------*/
5973 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5974 {
5975   Mat_Product *product = C->product;
5976   Mat         A = product->A,B=product->B;
5977 
5978   PetscFunctionBegin;
5979   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5980     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5981 
5982   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5983   C->ops->productsymbolic = MatProductSymbolic_AB;
5984   PetscFunctionReturn(0);
5985 }
5986 
5987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5988 {
5989   Mat_Product    *product = C->product;
5990 
5991   PetscFunctionBegin;
5992   if (product->type == MATPRODUCT_AB) {
5993     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5994   }
5995   PetscFunctionReturn(0);
5996 }
5997 
5998 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5999 
6000   Input Parameters:
6001 
6002     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6003     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6004 
6005     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6006 
6007     For Set1, j1[] contains column indices of the nonzeros.
6008     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6009     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6010     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6011 
6012     Similar for Set2.
6013 
6014     This routine merges the two sets of nonzeros row by row and removes repeats.
6015 
6016   Output Parameters: (memory is allocated by the caller)
6017 
6018     i[],j[]: the CSR of the merged matrix, which has m rows.
6019     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6020     imap2[]: similar to imap1[], but for Set2.
6021     Note we order nonzeros row-by-row and from left to right.
6022 */
6023 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6024   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6025   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6026 {
6027   PetscInt       r,m; /* Row index of mat */
6028   PetscCount     t,t1,t2,b1,e1,b2,e2;
6029 
6030   PetscFunctionBegin;
6031   PetscCall(MatGetLocalSize(mat,&m,NULL));
6032   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6033   i[0] = 0;
6034   for (r=0; r<m; r++) { /* Do row by row merging */
6035     b1   = rowBegin1[r];
6036     e1   = rowEnd1[r];
6037     b2   = rowBegin2[r];
6038     e2   = rowEnd2[r];
6039     while (b1 < e1 && b2 < e2) {
6040       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6041         j[t]      = j1[b1];
6042         imap1[t1] = t;
6043         imap2[t2] = t;
6044         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6045         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6046         t1++; t2++; t++;
6047       } else if (j1[b1] < j2[b2]) {
6048         j[t]      = j1[b1];
6049         imap1[t1] = t;
6050         b1       += jmap1[t1+1] - jmap1[t1];
6051         t1++; t++;
6052       } else {
6053         j[t]      = j2[b2];
6054         imap2[t2] = t;
6055         b2       += jmap2[t2+1] - jmap2[t2];
6056         t2++; t++;
6057       }
6058     }
6059     /* Merge the remaining in either j1[] or j2[] */
6060     while (b1 < e1) {
6061       j[t]      = j1[b1];
6062       imap1[t1] = t;
6063       b1       += jmap1[t1+1] - jmap1[t1];
6064       t1++; t++;
6065     }
6066     while (b2 < e2) {
6067       j[t]      = j2[b2];
6068       imap2[t2] = t;
6069       b2       += jmap2[t2+1] - jmap2[t2];
6070       t2++; t++;
6071     }
6072     i[r+1] = t;
6073   }
6074   PetscFunctionReturn(0);
6075 }
6076 
6077 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6078 
6079   Input Parameters:
6080     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6081     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6082       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6083 
6084       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6085       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6086 
6087   Output Parameters:
6088     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6089     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6090       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6091       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6092 
6093     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6094       Atot: number of entries belonging to the diagonal block.
6095       Annz: number of unique nonzeros belonging to the diagonal block.
6096       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6097         repeats (i.e., same 'i,j' pair).
6098       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6099         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6100 
6101       Atot: number of entries belonging to the diagonal block
6102       Annz: number of unique nonzeros belonging to the diagonal block.
6103 
6104     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6105 
6106     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6107 */
6108 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6109   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6110   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6111   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6112 {
6113   PetscInt          cstart,cend,rstart,rend,row,col;
6114   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6115   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6116   PetscCount        k,m,p,q,r,s,mid;
6117   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6118 
6119   PetscFunctionBegin;
6120   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6121   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6122   m    = rend - rstart;
6123 
6124   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6125 
6126   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6127      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6128   */
6129   while (k<n) {
6130     row = i[k];
6131     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6132     for (s=k; s<n; s++) if (i[s] != row) break;
6133     for (p=k; p<s; p++) {
6134       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6135       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6136     }
6137     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6138     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6139     rowBegin[row-rstart] = k;
6140     rowMid[row-rstart]   = mid;
6141     rowEnd[row-rstart]   = s;
6142 
6143     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6144     Atot += mid - k;
6145     Btot += s - mid;
6146 
6147     /* Count unique nonzeros of this diag/offdiag row */
6148     for (p=k; p<mid;) {
6149       col = j[p];
6150       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6151       Annz++;
6152     }
6153 
6154     for (p=mid; p<s;) {
6155       col = j[p];
6156       do {p++;} while (p<s && j[p] == col);
6157       Bnnz++;
6158     }
6159     k = s;
6160   }
6161 
6162   /* Allocation according to Atot, Btot, Annz, Bnnz */
6163   PetscCall(PetscMalloc1(Atot,&Aperm));
6164   PetscCall(PetscMalloc1(Btot,&Bperm));
6165   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6166   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6167 
6168   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6169   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6170   for (r=0; r<m; r++) {
6171     k     = rowBegin[r];
6172     mid   = rowMid[r];
6173     s     = rowEnd[r];
6174     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6175     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6176     Atot += mid - k;
6177     Btot += s - mid;
6178 
6179     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6180     for (p=k; p<mid;) {
6181       col = j[p];
6182       q   = p;
6183       do {p++;} while (p<mid && j[p] == col);
6184       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6185       Annz++;
6186     }
6187 
6188     for (p=mid; p<s;) {
6189       col = j[p];
6190       q   = p;
6191       do {p++;} while (p<s && j[p] == col);
6192       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6193       Bnnz++;
6194     }
6195   }
6196   /* Output */
6197   *Aperm_ = Aperm;
6198   *Annz_  = Annz;
6199   *Atot_  = Atot;
6200   *Ajmap_ = Ajmap;
6201   *Bperm_ = Bperm;
6202   *Bnnz_  = Bnnz;
6203   *Btot_  = Btot;
6204   *Bjmap_ = Bjmap;
6205   PetscFunctionReturn(0);
6206 }
6207 
6208 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6209 
6210   Input Parameters:
6211     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6212     nnz:  number of unique nonzeros in the merged matrix
6213     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6214     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6215 
6216   Output Parameter: (memory is allocated by the caller)
6217     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6218 
6219   Example:
6220     nnz1 = 4
6221     nnz  = 6
6222     imap = [1,3,4,5]
6223     jmap = [0,3,5,6,7]
6224    then,
6225     jmap_new = [0,0,3,3,5,6,7]
6226 */
6227 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6228 {
6229   PetscCount k,p;
6230 
6231   PetscFunctionBegin;
6232   jmap_new[0] = 0;
6233   p = nnz; /* p loops over jmap_new[] backwards */
6234   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6235     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6236   }
6237   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6238   PetscFunctionReturn(0);
6239 }
6240 
6241 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6242 {
6243   MPI_Comm                  comm;
6244   PetscMPIInt               rank,size;
6245   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6246   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6247   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6248 
6249   PetscFunctionBegin;
6250   PetscCall(PetscFree(mpiaij->garray));
6251   PetscCall(VecDestroy(&mpiaij->lvec));
6252 #if defined(PETSC_USE_CTABLE)
6253   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6254 #else
6255   PetscCall(PetscFree(mpiaij->colmap));
6256 #endif
6257   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6258   mat->assembled = PETSC_FALSE;
6259   mat->was_assembled = PETSC_FALSE;
6260   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6261 
6262   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6263   PetscCallMPI(MPI_Comm_size(comm,&size));
6264   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6265   PetscCall(PetscLayoutSetUp(mat->rmap));
6266   PetscCall(PetscLayoutSetUp(mat->cmap));
6267   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6268   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6269   PetscCall(MatGetLocalSize(mat,&m,&n));
6270   PetscCall(MatGetSize(mat,&M,&N));
6271 
6272   /* ---------------------------------------------------------------------------*/
6273   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6274   /* entries come first, then local rows, then remote rows.                     */
6275   /* ---------------------------------------------------------------------------*/
6276   PetscCount n1 = coo_n,*perm1;
6277   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6278   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6279   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6280   PetscCall(PetscArraycpy(j1,coo_j,n1));
6281   for (k=0; k<n1; k++) perm1[k] = k;
6282 
6283   /* Manipulate indices so that entries with negative row or col indices will have smallest
6284      row indices, local entries will have greater but negative row indices, and remote entries
6285      will have positive row indices.
6286   */
6287   for (k=0; k<n1; k++) {
6288     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6289     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6290     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6291     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6292   }
6293 
6294   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6295   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6296   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6297   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6298   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6299 
6300   /* ---------------------------------------------------------------------------*/
6301   /*           Split local rows into diag/offdiag portions                      */
6302   /* ---------------------------------------------------------------------------*/
6303   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6304   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6305   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6306 
6307   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6308   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6309   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6310 
6311   /* ---------------------------------------------------------------------------*/
6312   /*           Send remote rows to their owner                                  */
6313   /* ---------------------------------------------------------------------------*/
6314   /* Find which rows should be sent to which remote ranks*/
6315   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6316   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6317   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6318   const PetscInt *ranges;
6319   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6320 
6321   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6322   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6323   for (k=rem; k<n1;) {
6324     PetscMPIInt  owner;
6325     PetscInt     firstRow,lastRow;
6326 
6327     /* Locate a row range */
6328     firstRow = i1[k]; /* first row of this owner */
6329     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6330     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6331 
6332     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6333     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6334 
6335     /* All entries in [k,p) belong to this remote owner */
6336     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6337       PetscMPIInt *sendto2;
6338       PetscInt    *nentries2;
6339       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6340 
6341       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6342       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6343       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6344       PetscCall(PetscFree2(sendto,nentries2));
6345       sendto      = sendto2;
6346       nentries    = nentries2;
6347       maxNsend    = maxNsend2;
6348     }
6349     sendto[nsend]   = owner;
6350     nentries[nsend] = p - k;
6351     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6352     nsend++;
6353     k = p;
6354   }
6355 
6356   /* Build 1st SF to know offsets on remote to send data */
6357   PetscSF     sf1;
6358   PetscInt    nroots = 1,nroots2 = 0;
6359   PetscInt    nleaves = nsend,nleaves2 = 0;
6360   PetscInt    *offsets;
6361   PetscSFNode *iremote;
6362 
6363   PetscCall(PetscSFCreate(comm,&sf1));
6364   PetscCall(PetscMalloc1(nsend,&iremote));
6365   PetscCall(PetscMalloc1(nsend,&offsets));
6366   for (k=0; k<nsend; k++) {
6367     iremote[k].rank  = sendto[k];
6368     iremote[k].index = 0;
6369     nleaves2        += nentries[k];
6370     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6371   }
6372   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6373   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6374   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6375   PetscCall(PetscSFDestroy(&sf1));
6376   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6377 
6378   /* Build 2nd SF to send remote COOs to their owner */
6379   PetscSF sf2;
6380   nroots  = nroots2;
6381   nleaves = nleaves2;
6382   PetscCall(PetscSFCreate(comm,&sf2));
6383   PetscCall(PetscSFSetFromOptions(sf2));
6384   PetscCall(PetscMalloc1(nleaves,&iremote));
6385   p       = 0;
6386   for (k=0; k<nsend; k++) {
6387     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6388     for (q=0; q<nentries[k]; q++,p++) {
6389       iremote[p].rank  = sendto[k];
6390       iremote[p].index = offsets[k] + q;
6391     }
6392   }
6393   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6394 
6395   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6396   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6397 
6398   /* Send the remote COOs to their owner */
6399   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6400   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6401   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6402   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6403   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6404   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6405   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6406 
6407   PetscCall(PetscFree(offsets));
6408   PetscCall(PetscFree2(sendto,nentries));
6409 
6410   /* ---------------------------------------------------------------*/
6411   /* Sort received COOs by row along with the permutation array     */
6412   /* ---------------------------------------------------------------*/
6413   for (k=0; k<n2; k++) perm2[k] = k;
6414   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6415 
6416   /* ---------------------------------------------------------------*/
6417   /* Split received COOs into diag/offdiag portions                 */
6418   /* ---------------------------------------------------------------*/
6419   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6420   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6421   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6422 
6423   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6424   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6425 
6426   /* --------------------------------------------------------------------------*/
6427   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6428   /* --------------------------------------------------------------------------*/
6429   PetscInt   *Ai,*Bi;
6430   PetscInt   *Aj,*Bj;
6431 
6432   PetscCall(PetscMalloc1(m+1,&Ai));
6433   PetscCall(PetscMalloc1(m+1,&Bi));
6434   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6435   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6436 
6437   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6438   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6439   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6440   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6441   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6442 
6443   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6444   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6445 
6446   /* --------------------------------------------------------------------------*/
6447   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6448   /* expect nonzeros in A/B most likely have local contributing entries        */
6449   /* --------------------------------------------------------------------------*/
6450   PetscInt Annz = Ai[m];
6451   PetscInt Bnnz = Bi[m];
6452   PetscCount *Ajmap1_new,*Bjmap1_new;
6453 
6454   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6455   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6456 
6457   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6458   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6459 
6460   PetscCall(PetscFree(Aimap1));
6461   PetscCall(PetscFree(Ajmap1));
6462   PetscCall(PetscFree(Bimap1));
6463   PetscCall(PetscFree(Bjmap1));
6464   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6465   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6466   PetscCall(PetscFree3(i1,j1,perm1));
6467   PetscCall(PetscFree3(i2,j2,perm2));
6468 
6469   Ajmap1 = Ajmap1_new;
6470   Bjmap1 = Bjmap1_new;
6471 
6472   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6473   if (Annz < Annz1 + Annz2) {
6474     PetscInt *Aj_new;
6475     PetscCall(PetscMalloc1(Annz,&Aj_new));
6476     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6477     PetscCall(PetscFree(Aj));
6478     Aj   = Aj_new;
6479   }
6480 
6481   if (Bnnz < Bnnz1 + Bnnz2) {
6482     PetscInt *Bj_new;
6483     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6484     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6485     PetscCall(PetscFree(Bj));
6486     Bj   = Bj_new;
6487   }
6488 
6489   /* --------------------------------------------------------------------------------*/
6490   /* Create new submatrices for on-process and off-process coupling                  */
6491   /* --------------------------------------------------------------------------------*/
6492   PetscScalar   *Aa,*Ba;
6493   MatType       rtype;
6494   Mat_SeqAIJ    *a,*b;
6495   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6496   PetscCall(PetscCalloc1(Bnnz,&Ba));
6497   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6498   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6499   PetscCall(MatDestroy(&mpiaij->A));
6500   PetscCall(MatDestroy(&mpiaij->B));
6501   PetscCall(MatGetRootType_Private(mat,&rtype));
6502   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6503   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6504   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6505 
6506   a = (Mat_SeqAIJ*)mpiaij->A->data;
6507   b = (Mat_SeqAIJ*)mpiaij->B->data;
6508   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6509   a->free_a       = b->free_a       = PETSC_TRUE;
6510   a->free_ij      = b->free_ij      = PETSC_TRUE;
6511 
6512   /* conversion must happen AFTER multiply setup */
6513   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6514   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6515   PetscCall(VecDestroy(&mpiaij->lvec));
6516   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6517   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6518 
6519   mpiaij->coo_n   = coo_n;
6520   mpiaij->coo_sf  = sf2;
6521   mpiaij->sendlen = nleaves;
6522   mpiaij->recvlen = nroots;
6523 
6524   mpiaij->Annz    = Annz;
6525   mpiaij->Bnnz    = Bnnz;
6526 
6527   mpiaij->Annz2   = Annz2;
6528   mpiaij->Bnnz2   = Bnnz2;
6529 
6530   mpiaij->Atot1   = Atot1;
6531   mpiaij->Atot2   = Atot2;
6532   mpiaij->Btot1   = Btot1;
6533   mpiaij->Btot2   = Btot2;
6534 
6535   mpiaij->Ajmap1  = Ajmap1;
6536   mpiaij->Aperm1  = Aperm1;
6537 
6538   mpiaij->Bjmap1  = Bjmap1;
6539   mpiaij->Bperm1  = Bperm1;
6540 
6541   mpiaij->Aimap2  = Aimap2;
6542   mpiaij->Ajmap2  = Ajmap2;
6543   mpiaij->Aperm2  = Aperm2;
6544 
6545   mpiaij->Bimap2  = Bimap2;
6546   mpiaij->Bjmap2  = Bjmap2;
6547   mpiaij->Bperm2  = Bperm2;
6548 
6549   mpiaij->Cperm1  = Cperm1;
6550 
6551   /* Allocate in preallocation. If not used, it has zero cost on host */
6552   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6553   PetscFunctionReturn(0);
6554 }
6555 
6556 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6557 {
6558   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6559   Mat                  A = mpiaij->A,B = mpiaij->B;
6560   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6561   PetscScalar          *Aa,*Ba;
6562   PetscScalar          *sendbuf = mpiaij->sendbuf;
6563   PetscScalar          *recvbuf = mpiaij->recvbuf;
6564   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6565   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6566   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6567   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6568 
6569   PetscFunctionBegin;
6570   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6571   PetscCall(MatSeqAIJGetArray(B,&Ba));
6572 
6573   /* Pack entries to be sent to remote */
6574   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6575 
6576   /* Send remote entries to their owner and overlap the communication with local computation */
6577   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6578   /* Add local entries to A and B */
6579   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6580     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6581     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6582     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6583   }
6584   for (PetscCount i=0; i<Bnnz; i++) {
6585     PetscScalar sum = 0.0;
6586     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6587     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6588   }
6589   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6590 
6591   /* Add received remote entries to A and B */
6592   for (PetscCount i=0; i<Annz2; i++) {
6593     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6594   }
6595   for (PetscCount i=0; i<Bnnz2; i++) {
6596     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6597   }
6598   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6599   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6600   PetscFunctionReturn(0);
6601 }
6602 
6603 /* ----------------------------------------------------------------*/
6604 
6605 /*MC
6606    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6607 
6608    Options Database Keys:
6609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6610 
6611    Level: beginner
6612 
6613    Notes:
6614     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6615     in this case the values associated with the rows and columns one passes in are set to zero
6616     in the matrix
6617 
6618     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6619     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6620 
6621 .seealso: `MatCreateAIJ()`
6622 M*/
6623 
6624 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6625 {
6626   Mat_MPIAIJ     *b;
6627   PetscMPIInt    size;
6628 
6629   PetscFunctionBegin;
6630   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6631 
6632   PetscCall(PetscNewLog(B,&b));
6633   B->data       = (void*)b;
6634   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6635   B->assembled  = PETSC_FALSE;
6636   B->insertmode = NOT_SET_VALUES;
6637   b->size       = size;
6638 
6639   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6640 
6641   /* build cache for off array entries formed */
6642   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6643 
6644   b->donotstash  = PETSC_FALSE;
6645   b->colmap      = NULL;
6646   b->garray      = NULL;
6647   b->roworiented = PETSC_TRUE;
6648 
6649   /* stuff used for matrix vector multiply */
6650   b->lvec  = NULL;
6651   b->Mvctx = NULL;
6652 
6653   /* stuff for MatGetRow() */
6654   b->rowindices   = NULL;
6655   b->rowvalues    = NULL;
6656   b->getrowactive = PETSC_FALSE;
6657 
6658   /* flexible pointer used in CUSPARSE classes */
6659   b->spptr = NULL;
6660 
6661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6669   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6671 #if defined(PETSC_HAVE_CUDA)
6672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6673 #endif
6674 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6676 #endif
6677 #if defined(PETSC_HAVE_MKL_SPARSE)
6678   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6679 #endif
6680   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6681   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6682   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6683   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6684 #if defined(PETSC_HAVE_ELEMENTAL)
6685   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6686 #endif
6687 #if defined(PETSC_HAVE_SCALAPACK)
6688   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6689 #endif
6690   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6691   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6692 #if defined(PETSC_HAVE_HYPRE)
6693   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6694   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6695 #endif
6696   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6697   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6698   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6699   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6700   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6701   PetscFunctionReturn(0);
6702 }
6703 
6704 /*@C
6705      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6706          and "off-diagonal" part of the matrix in CSR format.
6707 
6708    Collective
6709 
6710    Input Parameters:
6711 +  comm - MPI communicator
6712 .  m - number of local rows (Cannot be PETSC_DECIDE)
6713 .  n - This value should be the same as the local size used in creating the
6714        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6715        calculated if N is given) For square matrices n is almost always m.
6716 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6717 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6718 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6719 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6720 .   a - matrix values
6721 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6722 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6723 -   oa - matrix values
6724 
6725    Output Parameter:
6726 .   mat - the matrix
6727 
6728    Level: advanced
6729 
6730    Notes:
6731        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6732        must free the arrays once the matrix has been destroyed and not before.
6733 
6734        The i and j indices are 0 based
6735 
6736        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6737 
6738        This sets local rows and cannot be used to set off-processor values.
6739 
6740        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6741        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6742        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6743        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6744        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6745        communication if it is known that only local entries will be set.
6746 
6747 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6748           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6749 @*/
6750 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6751 {
6752   Mat_MPIAIJ     *maij;
6753 
6754   PetscFunctionBegin;
6755   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6756   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6757   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6758   PetscCall(MatCreate(comm,mat));
6759   PetscCall(MatSetSizes(*mat,m,n,M,N));
6760   PetscCall(MatSetType(*mat,MATMPIAIJ));
6761   maij = (Mat_MPIAIJ*) (*mat)->data;
6762 
6763   (*mat)->preallocated = PETSC_TRUE;
6764 
6765   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6766   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6767 
6768   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6769   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6770 
6771   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6772   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6773   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6774   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6775   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6776   PetscFunctionReturn(0);
6777 }
6778 
6779 typedef struct {
6780   Mat       *mp;    /* intermediate products */
6781   PetscBool *mptmp; /* is the intermediate product temporary ? */
6782   PetscInt  cp;     /* number of intermediate products */
6783 
6784   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6785   PetscInt    *startsj_s,*startsj_r;
6786   PetscScalar *bufa;
6787   Mat         P_oth;
6788 
6789   /* may take advantage of merging product->B */
6790   Mat Bloc; /* B-local by merging diag and off-diag */
6791 
6792   /* cusparse does not have support to split between symbolic and numeric phases.
6793      When api_user is true, we don't need to update the numerical values
6794      of the temporary storage */
6795   PetscBool reusesym;
6796 
6797   /* support for COO values insertion */
6798   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6799   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6800   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6801   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6802   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6803   PetscMemType mtype;
6804 
6805   /* customization */
6806   PetscBool abmerge;
6807   PetscBool P_oth_bind;
6808 } MatMatMPIAIJBACKEND;
6809 
6810 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6811 {
6812   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6813   PetscInt            i;
6814 
6815   PetscFunctionBegin;
6816   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6817   PetscCall(PetscFree(mmdata->bufa));
6818   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6819   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6820   PetscCall(MatDestroy(&mmdata->P_oth));
6821   PetscCall(MatDestroy(&mmdata->Bloc));
6822   PetscCall(PetscSFDestroy(&mmdata->sf));
6823   for (i = 0; i < mmdata->cp; i++) {
6824     PetscCall(MatDestroy(&mmdata->mp[i]));
6825   }
6826   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6827   PetscCall(PetscFree(mmdata->own[0]));
6828   PetscCall(PetscFree(mmdata->own));
6829   PetscCall(PetscFree(mmdata->off[0]));
6830   PetscCall(PetscFree(mmdata->off));
6831   PetscCall(PetscFree(mmdata));
6832   PetscFunctionReturn(0);
6833 }
6834 
6835 /* Copy selected n entries with indices in idx[] of A to v[].
6836    If idx is NULL, copy the whole data array of A to v[]
6837  */
6838 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6839 {
6840   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6841 
6842   PetscFunctionBegin;
6843   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6844   if (f) {
6845     PetscCall((*f)(A,n,idx,v));
6846   } else {
6847     const PetscScalar *vv;
6848 
6849     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6850     if (n && idx) {
6851       PetscScalar    *w = v;
6852       const PetscInt *oi = idx;
6853       PetscInt       j;
6854 
6855       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6856     } else {
6857       PetscCall(PetscArraycpy(v,vv,n));
6858     }
6859     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6860   }
6861   PetscFunctionReturn(0);
6862 }
6863 
6864 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6865 {
6866   MatMatMPIAIJBACKEND *mmdata;
6867   PetscInt            i,n_d,n_o;
6868 
6869   PetscFunctionBegin;
6870   MatCheckProduct(C,1);
6871   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6872   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6873   if (!mmdata->reusesym) { /* update temporary matrices */
6874     if (mmdata->P_oth) {
6875       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6876     }
6877     if (mmdata->Bloc) {
6878       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6879     }
6880   }
6881   mmdata->reusesym = PETSC_FALSE;
6882 
6883   for (i = 0; i < mmdata->cp; i++) {
6884     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6885     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6886   }
6887   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6888     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6889 
6890     if (mmdata->mptmp[i]) continue;
6891     if (noff) {
6892       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6893 
6894       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6895       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6896       n_o += noff;
6897       n_d += nown;
6898     } else {
6899       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6900 
6901       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6902       n_d += mm->nz;
6903     }
6904   }
6905   if (mmdata->hasoffproc) { /* offprocess insertion */
6906     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6907     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6908   }
6909   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6910   PetscFunctionReturn(0);
6911 }
6912 
6913 /* Support for Pt * A, A * P, or Pt * A * P */
6914 #define MAX_NUMBER_INTERMEDIATE 4
6915 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6916 {
6917   Mat_Product            *product = C->product;
6918   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6919   Mat_MPIAIJ             *a,*p;
6920   MatMatMPIAIJBACKEND    *mmdata;
6921   ISLocalToGlobalMapping P_oth_l2g = NULL;
6922   IS                     glob = NULL;
6923   const char             *prefix;
6924   char                   pprefix[256];
6925   const PetscInt         *globidx,*P_oth_idx;
6926   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6927   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6928   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6929                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6930                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6931   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6932 
6933   MatProductType         ptype;
6934   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6935   PetscMPIInt            size;
6936 
6937   PetscFunctionBegin;
6938   MatCheckProduct(C,1);
6939   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6940   ptype = product->type;
6941   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6942     ptype = MATPRODUCT_AB;
6943     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6944   }
6945   switch (ptype) {
6946   case MATPRODUCT_AB:
6947     A = product->A;
6948     P = product->B;
6949     m = A->rmap->n;
6950     n = P->cmap->n;
6951     M = A->rmap->N;
6952     N = P->cmap->N;
6953     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6954     break;
6955   case MATPRODUCT_AtB:
6956     P = product->A;
6957     A = product->B;
6958     m = P->cmap->n;
6959     n = A->cmap->n;
6960     M = P->cmap->N;
6961     N = A->cmap->N;
6962     hasoffproc = PETSC_TRUE;
6963     break;
6964   case MATPRODUCT_PtAP:
6965     A = product->A;
6966     P = product->B;
6967     m = P->cmap->n;
6968     n = P->cmap->n;
6969     M = P->cmap->N;
6970     N = P->cmap->N;
6971     hasoffproc = PETSC_TRUE;
6972     break;
6973   default:
6974     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6975   }
6976   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6977   if (size == 1) hasoffproc = PETSC_FALSE;
6978 
6979   /* defaults */
6980   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6981     mp[i]    = NULL;
6982     mptmp[i] = PETSC_FALSE;
6983     rmapt[i] = -1;
6984     cmapt[i] = -1;
6985     rmapa[i] = NULL;
6986     cmapa[i] = NULL;
6987   }
6988 
6989   /* customization */
6990   PetscCall(PetscNew(&mmdata));
6991   mmdata->reusesym = product->api_user;
6992   if (ptype == MATPRODUCT_AB) {
6993     if (product->api_user) {
6994       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
6995       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6996       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6997       PetscOptionsEnd();
6998     } else {
6999       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
7000       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7001       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7002       PetscOptionsEnd();
7003     }
7004   } else if (ptype == MATPRODUCT_PtAP) {
7005     if (product->api_user) {
7006       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
7007       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7008       PetscOptionsEnd();
7009     } else {
7010       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7011       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7012       PetscOptionsEnd();
7013     }
7014   }
7015   a = (Mat_MPIAIJ*)A->data;
7016   p = (Mat_MPIAIJ*)P->data;
7017   PetscCall(MatSetSizes(C,m,n,M,N));
7018   PetscCall(PetscLayoutSetUp(C->rmap));
7019   PetscCall(PetscLayoutSetUp(C->cmap));
7020   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7021   PetscCall(MatGetOptionsPrefix(C,&prefix));
7022 
7023   cp   = 0;
7024   switch (ptype) {
7025   case MATPRODUCT_AB: /* A * P */
7026     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7027 
7028     /* A_diag * P_local (merged or not) */
7029     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7030       /* P is product->B */
7031       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7032       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7033       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7034       PetscCall(MatProductSetFill(mp[cp],product->fill));
7035       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7036       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7037       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7038       mp[cp]->product->api_user = product->api_user;
7039       PetscCall(MatProductSetFromOptions(mp[cp]));
7040       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7041       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7042       PetscCall(ISGetIndices(glob,&globidx));
7043       rmapt[cp] = 1;
7044       cmapt[cp] = 2;
7045       cmapa[cp] = globidx;
7046       mptmp[cp] = PETSC_FALSE;
7047       cp++;
7048     } else { /* A_diag * P_diag and A_diag * P_off */
7049       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7050       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7051       PetscCall(MatProductSetFill(mp[cp],product->fill));
7052       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7053       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7054       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7055       mp[cp]->product->api_user = product->api_user;
7056       PetscCall(MatProductSetFromOptions(mp[cp]));
7057       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7058       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7059       rmapt[cp] = 1;
7060       cmapt[cp] = 1;
7061       mptmp[cp] = PETSC_FALSE;
7062       cp++;
7063       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7064       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7065       PetscCall(MatProductSetFill(mp[cp],product->fill));
7066       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7067       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7068       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7069       mp[cp]->product->api_user = product->api_user;
7070       PetscCall(MatProductSetFromOptions(mp[cp]));
7071       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7072       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7073       rmapt[cp] = 1;
7074       cmapt[cp] = 2;
7075       cmapa[cp] = p->garray;
7076       mptmp[cp] = PETSC_FALSE;
7077       cp++;
7078     }
7079 
7080     /* A_off * P_other */
7081     if (mmdata->P_oth) {
7082       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7083       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7084       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7085       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7086       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7087       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7088       PetscCall(MatProductSetFill(mp[cp],product->fill));
7089       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7090       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7091       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7092       mp[cp]->product->api_user = product->api_user;
7093       PetscCall(MatProductSetFromOptions(mp[cp]));
7094       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7095       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7096       rmapt[cp] = 1;
7097       cmapt[cp] = 2;
7098       cmapa[cp] = P_oth_idx;
7099       mptmp[cp] = PETSC_FALSE;
7100       cp++;
7101     }
7102     break;
7103 
7104   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7105     /* A is product->B */
7106     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7107     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7108       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7109       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7110       PetscCall(MatProductSetFill(mp[cp],product->fill));
7111       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7112       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7113       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7114       mp[cp]->product->api_user = product->api_user;
7115       PetscCall(MatProductSetFromOptions(mp[cp]));
7116       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7117       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7118       PetscCall(ISGetIndices(glob,&globidx));
7119       rmapt[cp] = 2;
7120       rmapa[cp] = globidx;
7121       cmapt[cp] = 2;
7122       cmapa[cp] = globidx;
7123       mptmp[cp] = PETSC_FALSE;
7124       cp++;
7125     } else {
7126       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7127       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7128       PetscCall(MatProductSetFill(mp[cp],product->fill));
7129       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7130       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7131       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7132       mp[cp]->product->api_user = product->api_user;
7133       PetscCall(MatProductSetFromOptions(mp[cp]));
7134       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7135       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7136       PetscCall(ISGetIndices(glob,&globidx));
7137       rmapt[cp] = 1;
7138       cmapt[cp] = 2;
7139       cmapa[cp] = globidx;
7140       mptmp[cp] = PETSC_FALSE;
7141       cp++;
7142       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7143       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7144       PetscCall(MatProductSetFill(mp[cp],product->fill));
7145       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7146       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7147       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7148       mp[cp]->product->api_user = product->api_user;
7149       PetscCall(MatProductSetFromOptions(mp[cp]));
7150       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7151       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7152       rmapt[cp] = 2;
7153       rmapa[cp] = p->garray;
7154       cmapt[cp] = 2;
7155       cmapa[cp] = globidx;
7156       mptmp[cp] = PETSC_FALSE;
7157       cp++;
7158     }
7159     break;
7160   case MATPRODUCT_PtAP:
7161     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7162     /* P is product->B */
7163     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7164     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7165     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7166     PetscCall(MatProductSetFill(mp[cp],product->fill));
7167     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7168     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7169     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7170     mp[cp]->product->api_user = product->api_user;
7171     PetscCall(MatProductSetFromOptions(mp[cp]));
7172     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7173     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7174     PetscCall(ISGetIndices(glob,&globidx));
7175     rmapt[cp] = 2;
7176     rmapa[cp] = globidx;
7177     cmapt[cp] = 2;
7178     cmapa[cp] = globidx;
7179     mptmp[cp] = PETSC_FALSE;
7180     cp++;
7181     if (mmdata->P_oth) {
7182       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7183       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7184       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7185       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7186       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7187       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7188       PetscCall(MatProductSetFill(mp[cp],product->fill));
7189       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7190       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7191       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7192       mp[cp]->product->api_user = product->api_user;
7193       PetscCall(MatProductSetFromOptions(mp[cp]));
7194       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7195       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7196       mptmp[cp] = PETSC_TRUE;
7197       cp++;
7198       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7199       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7200       PetscCall(MatProductSetFill(mp[cp],product->fill));
7201       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7202       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7203       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7204       mp[cp]->product->api_user = product->api_user;
7205       PetscCall(MatProductSetFromOptions(mp[cp]));
7206       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7207       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7208       rmapt[cp] = 2;
7209       rmapa[cp] = globidx;
7210       cmapt[cp] = 2;
7211       cmapa[cp] = P_oth_idx;
7212       mptmp[cp] = PETSC_FALSE;
7213       cp++;
7214     }
7215     break;
7216   default:
7217     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7218   }
7219   /* sanity check */
7220   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7221 
7222   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7223   for (i = 0; i < cp; i++) {
7224     mmdata->mp[i]    = mp[i];
7225     mmdata->mptmp[i] = mptmp[i];
7226   }
7227   mmdata->cp = cp;
7228   C->product->data       = mmdata;
7229   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7230   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7231 
7232   /* memory type */
7233   mmdata->mtype = PETSC_MEMTYPE_HOST;
7234   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7235   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7236   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7237   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7238 
7239   /* prepare coo coordinates for values insertion */
7240 
7241   /* count total nonzeros of those intermediate seqaij Mats
7242     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7243     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7244     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7245   */
7246   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7247     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7248     if (mptmp[cp]) continue;
7249     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7250       const PetscInt *rmap = rmapa[cp];
7251       const PetscInt mr = mp[cp]->rmap->n;
7252       const PetscInt rs = C->rmap->rstart;
7253       const PetscInt re = C->rmap->rend;
7254       const PetscInt *ii  = mm->i;
7255       for (i = 0; i < mr; i++) {
7256         const PetscInt gr = rmap[i];
7257         const PetscInt nz = ii[i+1] - ii[i];
7258         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7259         else ncoo_oown += nz; /* this row is local */
7260       }
7261     } else ncoo_d += mm->nz;
7262   }
7263 
7264   /*
7265     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7266 
7267     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7268 
7269     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7270 
7271     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7272     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7273     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7274 
7275     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7276     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7277   */
7278   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7279   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7280 
7281   /* gather (i,j) of nonzeros inserted by remote procs */
7282   if (hasoffproc) {
7283     PetscSF  msf;
7284     PetscInt ncoo2,*coo_i2,*coo_j2;
7285 
7286     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7287     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7288     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7289 
7290     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7291       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7292       PetscInt   *idxoff = mmdata->off[cp];
7293       PetscInt   *idxown = mmdata->own[cp];
7294       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7295         const PetscInt *rmap = rmapa[cp];
7296         const PetscInt *cmap = cmapa[cp];
7297         const PetscInt *ii  = mm->i;
7298         PetscInt       *coi = coo_i + ncoo_o;
7299         PetscInt       *coj = coo_j + ncoo_o;
7300         const PetscInt mr = mp[cp]->rmap->n;
7301         const PetscInt rs = C->rmap->rstart;
7302         const PetscInt re = C->rmap->rend;
7303         const PetscInt cs = C->cmap->rstart;
7304         for (i = 0; i < mr; i++) {
7305           const PetscInt *jj = mm->j + ii[i];
7306           const PetscInt gr  = rmap[i];
7307           const PetscInt nz  = ii[i+1] - ii[i];
7308           if (gr < rs || gr >= re) { /* this is an offproc row */
7309             for (j = ii[i]; j < ii[i+1]; j++) {
7310               *coi++ = gr;
7311               *idxoff++ = j;
7312             }
7313             if (!cmapt[cp]) { /* already global */
7314               for (j = 0; j < nz; j++) *coj++ = jj[j];
7315             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7316               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7317             } else { /* offdiag */
7318               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7319             }
7320             ncoo_o += nz;
7321           } else { /* this is a local row */
7322             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7323           }
7324         }
7325       }
7326       mmdata->off[cp + 1] = idxoff;
7327       mmdata->own[cp + 1] = idxown;
7328     }
7329 
7330     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7331     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7332     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7333     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7334     ncoo = ncoo_d + ncoo_oown + ncoo2;
7335     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7336     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7337     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7338     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7339     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7340     PetscCall(PetscFree2(coo_i,coo_j));
7341     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7342     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7343     coo_i = coo_i2;
7344     coo_j = coo_j2;
7345   } else { /* no offproc values insertion */
7346     ncoo = ncoo_d;
7347     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7348 
7349     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7350     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7351     PetscCall(PetscSFSetUp(mmdata->sf));
7352   }
7353   mmdata->hasoffproc = hasoffproc;
7354 
7355   /* gather (i,j) of nonzeros inserted locally */
7356   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7357     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7358     PetscInt       *coi = coo_i + ncoo_d;
7359     PetscInt       *coj = coo_j + ncoo_d;
7360     const PetscInt *jj  = mm->j;
7361     const PetscInt *ii  = mm->i;
7362     const PetscInt *cmap = cmapa[cp];
7363     const PetscInt *rmap = rmapa[cp];
7364     const PetscInt mr = mp[cp]->rmap->n;
7365     const PetscInt rs = C->rmap->rstart;
7366     const PetscInt re = C->rmap->rend;
7367     const PetscInt cs = C->cmap->rstart;
7368 
7369     if (mptmp[cp]) continue;
7370     if (rmapt[cp] == 1) { /* consecutive rows */
7371       /* fill coo_i */
7372       for (i = 0; i < mr; i++) {
7373         const PetscInt gr = i + rs;
7374         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7375       }
7376       /* fill coo_j */
7377       if (!cmapt[cp]) { /* type-0, already global */
7378         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7379       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7380         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7381       } else { /* type-2, local to global for sparse columns */
7382         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7383       }
7384       ncoo_d += mm->nz;
7385     } else if (rmapt[cp] == 2) { /* sparse rows */
7386       for (i = 0; i < mr; i++) {
7387         const PetscInt *jj = mm->j + ii[i];
7388         const PetscInt gr  = rmap[i];
7389         const PetscInt nz  = ii[i+1] - ii[i];
7390         if (gr >= rs && gr < re) { /* local rows */
7391           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7392           if (!cmapt[cp]) { /* type-0, already global */
7393             for (j = 0; j < nz; j++) *coj++ = jj[j];
7394           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7395             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7396           } else { /* type-2, local to global for sparse columns */
7397             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7398           }
7399           ncoo_d += nz;
7400         }
7401       }
7402     }
7403   }
7404   if (glob) {
7405     PetscCall(ISRestoreIndices(glob,&globidx));
7406   }
7407   PetscCall(ISDestroy(&glob));
7408   if (P_oth_l2g) {
7409     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7410   }
7411   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7412   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7413   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7414 
7415   /* preallocate with COO data */
7416   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7417   PetscCall(PetscFree2(coo_i,coo_j));
7418   PetscFunctionReturn(0);
7419 }
7420 
7421 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7422 {
7423   Mat_Product *product = mat->product;
7424 #if defined(PETSC_HAVE_DEVICE)
7425   PetscBool    match   = PETSC_FALSE;
7426   PetscBool    usecpu  = PETSC_FALSE;
7427 #else
7428   PetscBool    match   = PETSC_TRUE;
7429 #endif
7430 
7431   PetscFunctionBegin;
7432   MatCheckProduct(mat,1);
7433 #if defined(PETSC_HAVE_DEVICE)
7434   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7435     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7436   }
7437   if (match) { /* we can always fallback to the CPU if requested */
7438     switch (product->type) {
7439     case MATPRODUCT_AB:
7440       if (product->api_user) {
7441         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7442         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7443         PetscOptionsEnd();
7444       } else {
7445         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7446         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7447         PetscOptionsEnd();
7448       }
7449       break;
7450     case MATPRODUCT_AtB:
7451       if (product->api_user) {
7452         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7453         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7454         PetscOptionsEnd();
7455       } else {
7456         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7457         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7458         PetscOptionsEnd();
7459       }
7460       break;
7461     case MATPRODUCT_PtAP:
7462       if (product->api_user) {
7463         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7464         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7465         PetscOptionsEnd();
7466       } else {
7467         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7468         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7469         PetscOptionsEnd();
7470       }
7471       break;
7472     default:
7473       break;
7474     }
7475     match = (PetscBool)!usecpu;
7476   }
7477 #endif
7478   if (match) {
7479     switch (product->type) {
7480     case MATPRODUCT_AB:
7481     case MATPRODUCT_AtB:
7482     case MATPRODUCT_PtAP:
7483       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7484       break;
7485     default:
7486       break;
7487     }
7488   }
7489   /* fallback to MPIAIJ ops */
7490   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7491   PetscFunctionReturn(0);
7492 }
7493 
7494 /*
7495     Special version for direct calls from Fortran
7496 */
7497 #include <petsc/private/fortranimpl.h>
7498 
7499 /* Change these macros so can be used in void function */
7500 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7501 #undef  PetscCall
7502 #define PetscCall(...) do {                                                                    \
7503     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7504     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7505       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7506       return;                                                                                  \
7507     }                                                                                          \
7508   } while (0)
7509 
7510 #undef SETERRQ
7511 #define SETERRQ(comm,ierr,...) do {                                                            \
7512     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7513     return;                                                                                    \
7514   } while (0)
7515 
7516 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7517 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7518 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7519 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7520 #else
7521 #endif
7522 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7523 {
7524   Mat          mat  = *mmat;
7525   PetscInt     m    = *mm, n = *mn;
7526   InsertMode   addv = *maddv;
7527   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7528   PetscScalar  value;
7529 
7530   MatCheckPreallocated(mat,1);
7531   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7532   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7533   {
7534     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7535     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7536     PetscBool roworiented = aij->roworiented;
7537 
7538     /* Some Variables required in the macro */
7539     Mat        A                    = aij->A;
7540     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7541     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7542     MatScalar  *aa;
7543     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7544     Mat        B                    = aij->B;
7545     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7546     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7547     MatScalar  *ba;
7548     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7549      * cannot use "#if defined" inside a macro. */
7550     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7551 
7552     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7553     PetscInt  nonew = a->nonew;
7554     MatScalar *ap1,*ap2;
7555 
7556     PetscFunctionBegin;
7557     PetscCall(MatSeqAIJGetArray(A,&aa));
7558     PetscCall(MatSeqAIJGetArray(B,&ba));
7559     for (i=0; i<m; i++) {
7560       if (im[i] < 0) continue;
7561       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7562       if (im[i] >= rstart && im[i] < rend) {
7563         row      = im[i] - rstart;
7564         lastcol1 = -1;
7565         rp1      = aj + ai[row];
7566         ap1      = aa + ai[row];
7567         rmax1    = aimax[row];
7568         nrow1    = ailen[row];
7569         low1     = 0;
7570         high1    = nrow1;
7571         lastcol2 = -1;
7572         rp2      = bj + bi[row];
7573         ap2      = ba + bi[row];
7574         rmax2    = bimax[row];
7575         nrow2    = bilen[row];
7576         low2     = 0;
7577         high2    = nrow2;
7578 
7579         for (j=0; j<n; j++) {
7580           if (roworiented) value = v[i*n+j];
7581           else value = v[i+j*m];
7582           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7583           if (in[j] >= cstart && in[j] < cend) {
7584             col = in[j] - cstart;
7585             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7586           } else if (in[j] < 0) continue;
7587           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7588             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7589             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
7590           } else {
7591             if (mat->was_assembled) {
7592               if (!aij->colmap) {
7593                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7594               }
7595 #if defined(PETSC_USE_CTABLE)
7596               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
7597               col--;
7598 #else
7599               col = aij->colmap[in[j]] - 1;
7600 #endif
7601               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7602                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7603                 col  =  in[j];
7604                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7605                 B        = aij->B;
7606                 b        = (Mat_SeqAIJ*)B->data;
7607                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7608                 rp2      = bj + bi[row];
7609                 ap2      = ba + bi[row];
7610                 rmax2    = bimax[row];
7611                 nrow2    = bilen[row];
7612                 low2     = 0;
7613                 high2    = nrow2;
7614                 bm       = aij->B->rmap->n;
7615                 ba       = b->a;
7616                 inserted = PETSC_FALSE;
7617               }
7618             } else col = in[j];
7619             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7620           }
7621         }
7622       } else if (!aij->donotstash) {
7623         if (roworiented) {
7624           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7625         } else {
7626           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7627         }
7628       }
7629     }
7630     PetscCall(MatSeqAIJRestoreArray(A,&aa));
7631     PetscCall(MatSeqAIJRestoreArray(B,&ba));
7632   }
7633   PetscFunctionReturnVoid();
7634 }
7635 /* Undefining these here since they were redefined from their original definition above! No
7636  * other PETSc functions should be defined past this point, as it is impossible to recover the
7637  * original definitions */
7638 #undef PetscCall
7639 #undef SETERRQ
7640