xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dfd676b1a855b7f967ece75a22ee7f6626d10f89)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50 
51   PetscFunctionBegin;
52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
53   A->boundtocpu = flg;
54 #endif
55   if (a->A) {
56     PetscCall(MatBindToCPU(a->A,flg));
57   }
58   if (a->B) {
59     PetscCall(MatBindToCPU(a->B,flg));
60   }
61 
62   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
63    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
64    * to differ from the parent matrix. */
65   if (a->lvec) {
66     PetscCall(VecBindToCPU(a->lvec,flg));
67   }
68   if (a->diag) {
69     PetscCall(VecBindToCPU(a->diag,flg));
70   }
71 
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
76 {
77   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
78 
79   PetscFunctionBegin;
80   if (mat->A) {
81     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
82     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
88 {
89   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
90   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
91   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
92   const PetscInt  *ia,*ib;
93   const MatScalar *aa,*bb,*aav,*bav;
94   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
95   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
96 
97   PetscFunctionBegin;
98   *keptrows = NULL;
99 
100   ia   = a->i;
101   ib   = b->i;
102   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
103   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
104   for (i=0; i<m; i++) {
105     na = ia[i+1] - ia[i];
106     nb = ib[i+1] - ib[i];
107     if (!na && !nb) {
108       cnt++;
109       goto ok1;
110     }
111     aa = aav + ia[i];
112     for (j=0; j<na; j++) {
113       if (aa[j] != 0.0) goto ok1;
114     }
115     bb = bav + ib[i];
116     for (j=0; j <nb; j++) {
117       if (bb[j] != 0.0) goto ok1;
118     }
119     cnt++;
120 ok1:;
121   }
122   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
123   if (!n0rows) {
124     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
125     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
126     PetscFunctionReturn(0);
127   }
128   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
129   cnt  = 0;
130   for (i=0; i<m; i++) {
131     na = ia[i+1] - ia[i];
132     nb = ib[i+1] - ib[i];
133     if (!na && !nb) continue;
134     aa = aav + ia[i];
135     for (j=0; j<na;j++) {
136       if (aa[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141     bb = bav + ib[i];
142     for (j=0; j<nb; j++) {
143       if (bb[j] != 0.0) {
144         rows[cnt++] = rstart + i;
145         goto ok2;
146       }
147     }
148 ok2:;
149   }
150   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
151   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
152   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
157 {
158   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
159   PetscBool         cong;
160 
161   PetscFunctionBegin;
162   PetscCall(MatHasCongruentLayouts(Y,&cong));
163   if (Y->assembled && cong) {
164     PetscCall(MatDiagonalSet(aij->A,D,is));
165   } else {
166     PetscCall(MatDiagonalSet_Default(Y,D,is));
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
172 {
173   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
174   PetscInt       i,rstart,nrows,*rows;
175 
176   PetscFunctionBegin;
177   *zrows = NULL;
178   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
179   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
180   for (i=0; i<nrows; i++) rows[i] += rstart;
181   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
186 {
187   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
188   PetscInt          i,m,n,*garray = aij->garray;
189   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
190   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
191   PetscReal         *work;
192   const PetscScalar *dummy;
193 
194   PetscFunctionBegin;
195   PetscCall(MatGetSize(A,&m,&n));
196   PetscCall(PetscCalloc1(n,&work));
197   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
198   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
199   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
200   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
201   if (type == NORM_2) {
202     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
203       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
204     }
205     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
206       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
207     }
208   } else if (type == NORM_1) {
209     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
210       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
211     }
212     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
213       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214     }
215   } else if (type == NORM_INFINITY) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
221     }
222   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228     }
229   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
235     }
236   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
237   if (type == NORM_INFINITY) {
238     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
239   } else {
240     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
241   }
242   PetscCall(PetscFree(work));
243   if (type == NORM_2) {
244     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
245   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
246     for (i=0; i<n; i++) reductions[i] /= m;
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
252 {
253   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
254   IS              sis,gis;
255   const PetscInt  *isis,*igis;
256   PetscInt        n,*iis,nsis,ngis,rstart,i;
257 
258   PetscFunctionBegin;
259   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
260   PetscCall(MatFindNonzeroRows(a->B,&gis));
261   PetscCall(ISGetSize(gis,&ngis));
262   PetscCall(ISGetSize(sis,&nsis));
263   PetscCall(ISGetIndices(sis,&isis));
264   PetscCall(ISGetIndices(gis,&igis));
265 
266   PetscCall(PetscMalloc1(ngis+nsis,&iis));
267   PetscCall(PetscArraycpy(iis,igis,ngis));
268   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
269   n    = ngis + nsis;
270   PetscCall(PetscSortRemoveDupsInt(&n,iis));
271   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
272   for (i=0; i<n; i++) iis[i] += rstart;
273   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
274 
275   PetscCall(ISRestoreIndices(sis,&isis));
276   PetscCall(ISRestoreIndices(gis,&igis));
277   PetscCall(ISDestroy(&sis));
278   PetscCall(ISDestroy(&gis));
279   PetscFunctionReturn(0);
280 }
281 
282 /*
283   Local utility routine that creates a mapping from the global column
284 number to the local number in the off-diagonal part of the local
285 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
286 a slightly higher hash table cost; without it it is not scalable (each processor
287 has an order N integer array but is fast to access.
288 */
289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
290 {
291   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
292   PetscInt       n = aij->B->cmap->n,i;
293 
294   PetscFunctionBegin;
295   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
296 #if defined(PETSC_USE_CTABLE)
297   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
298   for (i=0; i<n; i++) {
299     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
300   }
301 #else
302   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
303   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
304   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
305 #endif
306   PetscFunctionReturn(0);
307 }
308 
309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
310 { \
311     if (col <= lastcol1)  low1 = 0;     \
312     else                 high1 = nrow1; \
313     lastcol1 = col;\
314     while (high1-low1 > 5) { \
315       t = (low1+high1)/2; \
316       if (rp1[t] > col) high1 = t; \
317       else              low1  = t; \
318     } \
319       for (_i=low1; _i<high1; _i++) { \
320         if (rp1[_i] > col) break; \
321         if (rp1[_i] == col) { \
322           if (addv == ADD_VALUES) { \
323             ap1[_i] += value;   \
324             /* Not sure LogFlops will slow dow the code or not */ \
325             (void)PetscLogFlops(1.0);   \
326            } \
327           else                    ap1[_i] = value; \
328           goto a_noinsert; \
329         } \
330       }  \
331       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
332       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
333       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
334       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
335       N = nrow1++ - 1; a->nz++; high1++; \
336       /* shift up all the later entries in this row */ \
337       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
338       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
339       rp1[_i] = col;  \
340       ap1[_i] = value;  \
341       A->nonzerostate++;\
342       a_noinsert: ; \
343       ailen[row] = nrow1; \
344 }
345 
346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
347   { \
348     if (col <= lastcol2) low2 = 0;                        \
349     else high2 = nrow2;                                   \
350     lastcol2 = col;                                       \
351     while (high2-low2 > 5) {                              \
352       t = (low2+high2)/2;                                 \
353       if (rp2[t] > col) high2 = t;                        \
354       else             low2  = t;                         \
355     }                                                     \
356     for (_i=low2; _i<high2; _i++) {                       \
357       if (rp2[_i] > col) break;                           \
358       if (rp2[_i] == col) {                               \
359         if (addv == ADD_VALUES) {                         \
360           ap2[_i] += value;                               \
361           (void)PetscLogFlops(1.0);                       \
362         }                                                 \
363         else                    ap2[_i] = value;          \
364         goto b_noinsert;                                  \
365       }                                                   \
366     }                                                     \
367     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
368     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
369     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
370     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
371     N = nrow2++ - 1; b->nz++; high2++;                    \
372     /* shift up all the later entries in this row */      \
373     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
374     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
375     rp2[_i] = col;                                        \
376     ap2[_i] = value;                                      \
377     B->nonzerostate++;                                    \
378     b_noinsert: ;                                         \
379     bilen[row] = nrow2;                                   \
380   }
381 
382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
383 {
384   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
385   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
386   PetscInt       l,*garray = mat->garray,diag;
387   PetscScalar    *aa,*ba;
388 
389   PetscFunctionBegin;
390   /* code only works for square matrices A */
391 
392   /* find size of row to the left of the diagonal part */
393   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
394   row  = row - diag;
395   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
396     if (garray[b->j[b->i[row]+l]] > diag) break;
397   }
398   if (l) {
399     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
400     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
401     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
402   }
403 
404   /* diagonal part */
405   if (a->i[row+1]-a->i[row]) {
406     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
407     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
408     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
409   }
410 
411   /* right of diagonal part */
412   if (b->i[row+1]-b->i[row]-l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
421 {
422   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
423   PetscScalar    value = 0.0;
424   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
425   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
426   PetscBool      roworiented = aij->roworiented;
427 
428   /* Some Variables required in the macro */
429   Mat        A                    = aij->A;
430   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
431   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
432   PetscBool  ignorezeroentries    = a->ignorezeroentries;
433   Mat        B                    = aij->B;
434   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
435   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
436   MatScalar  *aa,*ba;
437   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
438   PetscInt   nonew;
439   MatScalar  *ap1,*ap2;
440 
441   PetscFunctionBegin;
442   PetscCall(MatSeqAIJGetArray(A,&aa));
443   PetscCall(MatSeqAIJGetArray(B,&ba));
444   for (i=0; i<m; i++) {
445     if (im[i] < 0) continue;
446     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
447     if (im[i] >= rstart && im[i] < rend) {
448       row      = im[i] - rstart;
449       lastcol1 = -1;
450       rp1      = aj + ai[row];
451       ap1      = aa + ai[row];
452       rmax1    = aimax[row];
453       nrow1    = ailen[row];
454       low1     = 0;
455       high1    = nrow1;
456       lastcol2 = -1;
457       rp2      = bj + bi[row];
458       ap2      = ba + bi[row];
459       rmax2    = bimax[row];
460       nrow2    = bilen[row];
461       low2     = 0;
462       high2    = nrow2;
463 
464       for (j=0; j<n; j++) {
465         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
466         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
467         if (in[j] >= cstart && in[j] < cend) {
468           col   = in[j] - cstart;
469           nonew = a->nonew;
470           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
471         } else if (in[j] < 0) continue;
472         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
473         else {
474           if (mat->was_assembled) {
475             if (!aij->colmap) {
476               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
477             }
478 #if defined(PETSC_USE_CTABLE)
479             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
480             col--;
481 #else
482             col = aij->colmap[in[j]] - 1;
483 #endif
484             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
485               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
486               col  =  in[j];
487               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
488               B        = aij->B;
489               b        = (Mat_SeqAIJ*)B->data;
490               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
491               rp2      = bj + bi[row];
492               ap2      = ba + bi[row];
493               rmax2    = bimax[row];
494               nrow2    = bilen[row];
495               low2     = 0;
496               high2    = nrow2;
497               bm       = aij->B->rmap->n;
498               ba       = b->a;
499             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
500               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
501                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
502               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
503             }
504           } else col = in[j];
505           nonew = b->nonew;
506           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
507         }
508       }
509     } else {
510       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
515         } else {
516           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
517         }
518       }
519     }
520   }
521   PetscCall(MatSeqAIJRestoreArray(A,&aa));
522   PetscCall(MatSeqAIJRestoreArray(B,&ba));
523   PetscFunctionReturn(0);
524 }
525 
526 /*
527     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
528     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
529     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
530 */
531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
532 {
533   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
534   Mat            A           = aij->A; /* diagonal part of the matrix */
535   Mat            B           = aij->B; /* offdiagonal part of the matrix */
536   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
537   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
538   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
539   PetscInt       *ailen      = a->ilen,*aj = a->j;
540   PetscInt       *bilen      = b->ilen,*bj = b->j;
541   PetscInt       am          = aij->A->rmap->n,j;
542   PetscInt       diag_so_far = 0,dnz;
543   PetscInt       offd_so_far = 0,onz;
544 
545   PetscFunctionBegin;
546   /* Iterate over all rows of the matrix */
547   for (j=0; j<am; j++) {
548     dnz = onz = 0;
549     /*  Iterate over all non-zero columns of the current row */
550     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
551       /* If column is in the diagonal */
552       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553         aj[diag_so_far++] = mat_j[col] - cstart;
554         dnz++;
555       } else { /* off-diagonal entries */
556         bj[offd_so_far++] = mat_j[col];
557         onz++;
558       }
559     }
560     ailen[j] = dnz;
561     bilen[j] = onz;
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
569     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
570     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
571     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572 */
573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
574 {
575   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
576   Mat            A      = aij->A; /* diagonal part of the matrix */
577   Mat            B      = aij->B; /* offdiagonal part of the matrix */
578   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
579   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
580   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
581   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
582   PetscInt       *ailen = a->ilen,*aj = a->j;
583   PetscInt       *bilen = b->ilen,*bj = b->j;
584   PetscInt       am     = aij->A->rmap->n,j;
585   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
587   PetscScalar    *aa = a->a,*ba = b->a;
588 
589   PetscFunctionBegin;
590   /* Iterate over all rows of the matrix */
591   for (j=0; j<am; j++) {
592     dnz_row = onz_row = 0;
593     rowstart_offd = full_offd_i[j];
594     rowstart_diag = full_diag_i[j];
595     /*  Iterate over all non-zero columns of the current row */
596     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
597       /* If column is in the diagonal */
598       if (mat_j[col] >= cstart && mat_j[col] < cend) {
599         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
600         aa[rowstart_diag+dnz_row] = mat_a[col];
601         dnz_row++;
602       } else { /* off-diagonal entries */
603         bj[rowstart_offd+onz_row] = mat_j[col];
604         ba[rowstart_offd+onz_row] = mat_a[col];
605         onz_row++;
606       }
607     }
608     ailen[j] = dnz_row;
609     bilen[j] = onz_row;
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615 {
616   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
617   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
618   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
619 
620   PetscFunctionBegin;
621   for (i=0; i<m; i++) {
622     if (idxm[i] < 0) continue; /* negative row */
623     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
624     if (idxm[i] >= rstart && idxm[i] < rend) {
625       row = idxm[i] - rstart;
626       for (j=0; j<n; j++) {
627         if (idxn[j] < 0) continue; /* negative column */
628         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
629         if (idxn[j] >= cstart && idxn[j] < cend) {
630           col  = idxn[j] - cstart;
631           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
632         } else {
633           if (!aij->colmap) {
634             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
635           }
636 #if defined(PETSC_USE_CTABLE)
637           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
638           col--;
639 #else
640           col = aij->colmap[idxn[j]] - 1;
641 #endif
642           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
643           else {
644             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
645           }
646         }
647       }
648     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
654 {
655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
662   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
663   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscMPIInt    n;
671   PetscInt       i,j,rstart,ncols,flg;
672   PetscInt       *row,*col;
673   PetscBool      other_disassembled;
674   PetscScalar    *val;
675 
676   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
677 
678   PetscFunctionBegin;
679   if (!aij->donotstash && !mat->nooffprocentries) {
680     while (1) {
681       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
682       if (!flg) break;
683 
684       for (i=0; i<n;) {
685         /* Now identify the consecutive vals belonging to the same row */
686         for (j=i,rstart=row[j]; j<n; j++) {
687           if (row[j] != rstart) break;
688         }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
693         i    = j;
694       }
695     }
696     PetscCall(MatStashScatterEnd_Private(&mat->stash));
697   }
698 #if defined(PETSC_HAVE_DEVICE)
699   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
700   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
701   if (mat->boundtocpu) {
702     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
703     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
704   }
705 #endif
706   PetscCall(MatAssemblyBegin(aij->A,mode));
707   PetscCall(MatAssemblyEnd(aij->A,mode));
708 
709   /* determine if any processor has disassembled, if so we must
710      also disassemble ourself, in order that we may reassemble. */
711   /*
712      if nonzero structure of submatrix B cannot change then we know that
713      no processor disassembled thus we can skip this stuff
714   */
715   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
716     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
717     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
718       PetscCall(MatDisAssemble_MPIAIJ(mat));
719     }
720   }
721   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
722     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
723   }
724   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
725 #if defined(PETSC_HAVE_DEVICE)
726   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
727 #endif
728   PetscCall(MatAssemblyBegin(aij->B,mode));
729   PetscCall(MatAssemblyEnd(aij->B,mode));
730 
731   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
732 
733   aij->rowvalues = NULL;
734 
735   PetscCall(VecDestroy(&aij->diag));
736 
737   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
738   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
739     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
740     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
741   }
742 #if defined(PETSC_HAVE_DEVICE)
743   mat->offloadmask = PETSC_OFFLOAD_BOTH;
744 #endif
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
749 {
750   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
751 
752   PetscFunctionBegin;
753   PetscCall(MatZeroEntries(l->A));
754   PetscCall(MatZeroEntries(l->B));
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
759 {
760   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
761   PetscObjectState sA, sB;
762   PetscInt        *lrows;
763   PetscInt         r, len;
764   PetscBool        cong, lch, gch;
765 
766   PetscFunctionBegin;
767   /* get locally owned rows */
768   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
769   PetscCall(MatHasCongruentLayouts(A,&cong));
770   /* fix right hand side if needed */
771   if (x && b) {
772     const PetscScalar *xx;
773     PetscScalar       *bb;
774 
775     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
776     PetscCall(VecGetArrayRead(x, &xx));
777     PetscCall(VecGetArray(b, &bb));
778     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
779     PetscCall(VecRestoreArrayRead(x, &xx));
780     PetscCall(VecRestoreArray(b, &bb));
781   }
782 
783   sA = mat->A->nonzerostate;
784   sB = mat->B->nonzerostate;
785 
786   if (diag != 0.0 && cong) {
787     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
788     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
789   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
790     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
791     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
792     PetscInt   nnwA, nnwB;
793     PetscBool  nnzA, nnzB;
794 
795     nnwA = aijA->nonew;
796     nnwB = aijB->nonew;
797     nnzA = aijA->keepnonzeropattern;
798     nnzB = aijB->keepnonzeropattern;
799     if (!nnzA) {
800       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
801       aijA->nonew = 0;
802     }
803     if (!nnzB) {
804       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
805       aijB->nonew = 0;
806     }
807     /* Must zero here before the next loop */
808     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
809     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
810     for (r = 0; r < len; ++r) {
811       const PetscInt row = lrows[r] + A->rmap->rstart;
812       if (row >= A->cmap->N) continue;
813       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
814     }
815     aijA->nonew = nnwA;
816     aijB->nonew = nnwB;
817   } else {
818     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
819     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
820   }
821   PetscCall(PetscFree(lrows));
822   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
823   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
824 
825   /* reduce nonzerostate */
826   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
827   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
828   if (gch) A->nonzerostate++;
829   PetscFunctionReturn(0);
830 }
831 
832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
833 {
834   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
835   PetscMPIInt       n = A->rmap->n;
836   PetscInt          i,j,r,m,len = 0;
837   PetscInt          *lrows,*owners = A->rmap->range;
838   PetscMPIInt       p = 0;
839   PetscSFNode       *rrows;
840   PetscSF           sf;
841   const PetscScalar *xx;
842   PetscScalar       *bb,*mask,*aij_a;
843   Vec               xmask,lmask;
844   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
845   const PetscInt    *aj, *ii,*ridx;
846   PetscScalar       *aa;
847 
848   PetscFunctionBegin;
849   /* Create SF where leaves are input rows and roots are owned rows */
850   PetscCall(PetscMalloc1(n, &lrows));
851   for (r = 0; r < n; ++r) lrows[r] = -1;
852   PetscCall(PetscMalloc1(N, &rrows));
853   for (r = 0; r < N; ++r) {
854     const PetscInt idx   = rows[r];
855     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
856     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
857       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
858     }
859     rrows[r].rank  = p;
860     rrows[r].index = rows[r] - owners[p];
861   }
862   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
863   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
864   /* Collect flags for rows to be zeroed */
865   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
866   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
867   PetscCall(PetscSFDestroy(&sf));
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
872   /* handle off diagonal part of matrix */
873   PetscCall(MatCreateVecs(A,&xmask,NULL));
874   PetscCall(VecDuplicate(l->lvec,&lmask));
875   PetscCall(VecGetArray(xmask,&bb));
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   PetscCall(VecRestoreArray(xmask,&bb));
878   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
879   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
880   PetscCall(VecDestroy(&xmask));
881   if (x && b) { /* this code is buggy when the row and column layout don't match */
882     PetscBool cong;
883 
884     PetscCall(MatHasCongruentLayouts(A,&cong));
885     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
886     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
887     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
888     PetscCall(VecGetArrayRead(l->lvec,&xx));
889     PetscCall(VecGetArray(b,&bb));
890   }
891   PetscCall(VecGetArray(lmask,&mask));
892   /* remove zeroed rows of off diagonal matrix */
893   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
894   ii = aij->i;
895   for (i=0; i<len; i++) {
896     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
897   }
898   /* loop over all elements of off process part of matrix zeroing removed columns*/
899   if (aij->compressedrow.use) {
900     m    = aij->compressedrow.nrows;
901     ii   = aij->compressedrow.i;
902     ridx = aij->compressedrow.rindex;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij_a + ii[i];
907 
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[*ridx] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916       ridx++;
917     }
918   } else { /* do not use compressed row format */
919     m = l->B->rmap->n;
920     for (i=0; i<m; i++) {
921       n  = ii[i+1] - ii[i];
922       aj = aij->j + ii[i];
923       aa = aij_a + ii[i];
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[i] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932     }
933   }
934   if (x && b) {
935     PetscCall(VecRestoreArray(b,&bb));
936     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
937   }
938   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
939   PetscCall(VecRestoreArray(lmask,&mask));
940   PetscCall(VecDestroy(&lmask));
941   PetscCall(PetscFree(lrows));
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscInt       nt;
955   VecScatter     Mvctx = a->Mvctx;
956 
957   PetscFunctionBegin;
958   PetscCall(VecGetLocalSize(xx,&nt));
959   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
960   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
961   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
962   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
963   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970 
971   PetscFunctionBegin;
972   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   VecScatter     Mvctx = a->Mvctx;
980 
981   PetscFunctionBegin;
982   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
983   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
984   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
985   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992 
993   PetscFunctionBegin;
994   /* do nondiagonal part */
995   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
996   /* do local part */
997   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
998   /* add partial results together */
999   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1000   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1005 {
1006   MPI_Comm       comm;
1007   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1008   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1009   IS             Me,Notme;
1010   PetscInt       M,N,first,last,*notme,i;
1011   PetscBool      lf;
1012   PetscMPIInt    size;
1013 
1014   PetscFunctionBegin;
1015   /* Easy test: symmetric diagonal block */
1016   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1017   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1018   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1019   if (!*f) PetscFunctionReturn(0);
1020   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1021   PetscCallMPI(MPI_Comm_size(comm,&size));
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025   PetscCall(MatGetSize(Amat,&M,&N));
1026   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1027   PetscCall(PetscMalloc1(N-last+first,&notme));
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1031   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1032   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1033   Aoff = Aoffs[0];
1034   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1035   Boff = Boffs[0];
1036   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1037   PetscCall(MatDestroyMatrices(1,&Aoffs));
1038   PetscCall(MatDestroyMatrices(1,&Boffs));
1039   PetscCall(ISDestroy(&Me));
1040   PetscCall(ISDestroy(&Notme));
1041   PetscCall(PetscFree(notme));
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046 {
1047   PetscFunctionBegin;
1048   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055 
1056   PetscFunctionBegin;
1057   /* do nondiagonal part */
1058   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1059   /* do local part */
1060   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1061   /* add partial results together */
1062   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1063   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1077   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1078   PetscCall(MatGetDiagonal(a->A,v));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1083 {
1084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1085 
1086   PetscFunctionBegin;
1087   PetscCall(MatScale(a->A,aa));
1088   PetscCall(MatScale(a->B,aa));
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1094 {
1095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1096 
1097   PetscFunctionBegin;
1098   PetscCall(PetscSFDestroy(&aij->coo_sf));
1099   PetscCall(PetscFree(aij->Aperm1));
1100   PetscCall(PetscFree(aij->Bperm1));
1101   PetscCall(PetscFree(aij->Ajmap1));
1102   PetscCall(PetscFree(aij->Bjmap1));
1103 
1104   PetscCall(PetscFree(aij->Aimap2));
1105   PetscCall(PetscFree(aij->Bimap2));
1106   PetscCall(PetscFree(aij->Aperm2));
1107   PetscCall(PetscFree(aij->Bperm2));
1108   PetscCall(PetscFree(aij->Ajmap2));
1109   PetscCall(PetscFree(aij->Bjmap2));
1110 
1111   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1112   PetscCall(PetscFree(aij->Cperm1));
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1117 {
1118   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1119 
1120   PetscFunctionBegin;
1121 #if defined(PETSC_USE_LOG)
1122   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1123 #endif
1124   PetscCall(MatStashDestroy_Private(&mat->stash));
1125   PetscCall(VecDestroy(&aij->diag));
1126   PetscCall(MatDestroy(&aij->A));
1127   PetscCall(MatDestroy(&aij->B));
1128 #if defined(PETSC_USE_CTABLE)
1129   PetscCall(PetscTableDestroy(&aij->colmap));
1130 #else
1131   PetscCall(PetscFree(aij->colmap));
1132 #endif
1133   PetscCall(PetscFree(aij->garray));
1134   PetscCall(VecDestroy(&aij->lvec));
1135   PetscCall(VecScatterDestroy(&aij->Mvctx));
1136   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1137   PetscCall(PetscFree(aij->ld));
1138 
1139   /* Free COO */
1140   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1141 
1142   PetscCall(PetscFree(mat->data));
1143 
1144   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1145   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1146 
1147   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1148   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1149   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1150   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1151   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1152   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1153   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1154   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1157 #if defined(PETSC_HAVE_CUDA)
1158   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1159 #endif
1160 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1162 #endif
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1164 #if defined(PETSC_HAVE_ELEMENTAL)
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1166 #endif
1167 #if defined(PETSC_HAVE_SCALAPACK)
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1169 #endif
1170 #if defined(PETSC_HAVE_HYPRE)
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1173 #endif
1174   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1179   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1180 #if defined(PETSC_HAVE_MKL_SPARSE)
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1182 #endif
1183   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1192 {
1193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1194   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1195   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1196   const PetscInt    *garray = aij->garray;
1197   const PetscScalar *aa,*ba;
1198   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1199   PetscInt          *rowlens;
1200   PetscInt          *colidxs;
1201   PetscScalar       *matvals;
1202 
1203   PetscFunctionBegin;
1204   PetscCall(PetscViewerSetUp(viewer));
1205 
1206   M  = mat->rmap->N;
1207   N  = mat->cmap->N;
1208   m  = mat->rmap->n;
1209   rs = mat->rmap->rstart;
1210   cs = mat->cmap->rstart;
1211   nz = A->nz + B->nz;
1212 
1213   /* write matrix header */
1214   header[0] = MAT_FILE_CLASSID;
1215   header[1] = M; header[2] = N; header[3] = nz;
1216   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1217   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1218 
1219   /* fill in and store row lengths  */
1220   PetscCall(PetscMalloc1(m,&rowlens));
1221   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1222   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1223   PetscCall(PetscFree(rowlens));
1224 
1225   /* fill in and store column indices */
1226   PetscCall(PetscMalloc1(nz,&colidxs));
1227   for (cnt=0, i=0; i<m; i++) {
1228     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1229       if (garray[B->j[jb]] > cs) break;
1230       colidxs[cnt++] = garray[B->j[jb]];
1231     }
1232     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1233       colidxs[cnt++] = A->j[ja] + cs;
1234     for (; jb<B->i[i+1]; jb++)
1235       colidxs[cnt++] = garray[B->j[jb]];
1236   }
1237   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1238   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1239   PetscCall(PetscFree(colidxs));
1240 
1241   /* fill in and store nonzero values */
1242   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1243   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1244   PetscCall(PetscMalloc1(nz,&matvals));
1245   for (cnt=0, i=0; i<m; i++) {
1246     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1247       if (garray[B->j[jb]] > cs) break;
1248       matvals[cnt++] = ba[jb];
1249     }
1250     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1251       matvals[cnt++] = aa[ja];
1252     for (; jb<B->i[i+1]; jb++)
1253       matvals[cnt++] = ba[jb];
1254   }
1255   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1256   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1257   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1258   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1259   PetscCall(PetscFree(matvals));
1260 
1261   /* write block size option to the viewer's .info file */
1262   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #include <petscdraw.h>
1267 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1268 {
1269   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1270   PetscMPIInt       rank = aij->rank,size = aij->size;
1271   PetscBool         isdraw,iascii,isbinary;
1272   PetscViewer       sviewer;
1273   PetscViewerFormat format;
1274 
1275   PetscFunctionBegin;
1276   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1277   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1278   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1279   if (iascii) {
1280     PetscCall(PetscViewerGetFormat(viewer,&format));
1281     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1282       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1283       PetscCall(PetscMalloc1(size,&nz));
1284       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1285       for (i=0; i<(PetscInt)size; i++) {
1286         nmax = PetscMax(nmax,nz[i]);
1287         nmin = PetscMin(nmin,nz[i]);
1288         navg += nz[i];
1289       }
1290       PetscCall(PetscFree(nz));
1291       navg = navg/size;
1292       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1293       PetscFunctionReturn(0);
1294     }
1295     PetscCall(PetscViewerGetFormat(viewer,&format));
1296     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1297       MatInfo   info;
1298       PetscInt *inodes=NULL;
1299 
1300       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1301       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1302       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1303       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1304       if (!inodes) {
1305         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1306                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1307       } else {
1308         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1309                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1310       }
1311       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1312       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1313       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1314       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1315       PetscCall(PetscViewerFlush(viewer));
1316       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1317       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1318       PetscCall(VecScatterView(aij->Mvctx,viewer));
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1321       PetscInt inodecount,inodelimit,*inodes;
1322       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1323       if (inodes) {
1324         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1325       } else {
1326         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1327       }
1328       PetscFunctionReturn(0);
1329     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1330       PetscFunctionReturn(0);
1331     }
1332   } else if (isbinary) {
1333     if (size == 1) {
1334       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1335       PetscCall(MatView(aij->A,viewer));
1336     } else {
1337       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1338     }
1339     PetscFunctionReturn(0);
1340   } else if (iascii && size == 1) {
1341     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1342     PetscCall(MatView(aij->A,viewer));
1343     PetscFunctionReturn(0);
1344   } else if (isdraw) {
1345     PetscDraw draw;
1346     PetscBool isnull;
1347     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1348     PetscCall(PetscDrawIsNull(draw,&isnull));
1349     if (isnull) PetscFunctionReturn(0);
1350   }
1351 
1352   { /* assemble the entire matrix onto first processor */
1353     Mat A = NULL, Av;
1354     IS  isrow,iscol;
1355 
1356     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1357     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1358     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1359     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1360 /*  The commented code uses MatCreateSubMatrices instead */
1361 /*
1362     Mat *AA, A = NULL, Av;
1363     IS  isrow,iscol;
1364 
1365     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1366     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1367     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1368     if (rank == 0) {
1369        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1370        A    = AA[0];
1371        Av   = AA[0];
1372     }
1373     PetscCall(MatDestroySubMatrices(1,&AA));
1374 */
1375     PetscCall(ISDestroy(&iscol));
1376     PetscCall(ISDestroy(&isrow));
1377     /*
1378        Everyone has to call to draw the matrix since the graphics waits are
1379        synchronized across all processors that share the PetscDraw object
1380     */
1381     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1382     if (rank == 0) {
1383       if (((PetscObject)mat)->name) {
1384         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1385       }
1386       PetscCall(MatView_SeqAIJ(Av,sviewer));
1387     }
1388     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1389     PetscCall(PetscViewerFlush(viewer));
1390     PetscCall(MatDestroy(&A));
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1396 {
1397   PetscBool      iascii,isdraw,issocket,isbinary;
1398 
1399   PetscFunctionBegin;
1400   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1401   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1402   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1403   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1404   if (iascii || isdraw || isbinary || issocket) {
1405     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   Vec            bb1 = NULL;
1414   PetscBool      hasop;
1415 
1416   PetscFunctionBegin;
1417   if (flag == SOR_APPLY_UPPER) {
1418     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1419     PetscFunctionReturn(0);
1420   }
1421 
1422   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1423     PetscCall(VecDuplicate(bb,&bb1));
1424   }
1425 
1426   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1427     if (flag & SOR_ZERO_INITIAL_GUESS) {
1428       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1429       its--;
1430     }
1431 
1432     while (its--) {
1433       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1434       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1435 
1436       /* update rhs: bb1 = bb - B*x */
1437       PetscCall(VecScale(mat->lvec,-1.0));
1438       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1439 
1440       /* local sweep */
1441       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1442     }
1443   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1444     if (flag & SOR_ZERO_INITIAL_GUESS) {
1445       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1446       its--;
1447     }
1448     while (its--) {
1449       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1450       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1451 
1452       /* update rhs: bb1 = bb - B*x */
1453       PetscCall(VecScale(mat->lvec,-1.0));
1454       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1455 
1456       /* local sweep */
1457       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1458     }
1459   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1460     if (flag & SOR_ZERO_INITIAL_GUESS) {
1461       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1462       its--;
1463     }
1464     while (its--) {
1465       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1466       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       PetscCall(VecScale(mat->lvec,-1.0));
1470       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1471 
1472       /* local sweep */
1473       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1474     }
1475   } else if (flag & SOR_EISENSTAT) {
1476     Vec xx1;
1477 
1478     PetscCall(VecDuplicate(bb,&xx1));
1479     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1480 
1481     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1482     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1483     if (!mat->diag) {
1484       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1485       PetscCall(MatGetDiagonal(matin,mat->diag));
1486     }
1487     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1488     if (hasop) {
1489       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1490     } else {
1491       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1492     }
1493     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1494 
1495     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1496 
1497     /* local sweep */
1498     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1499     PetscCall(VecAXPY(xx,1.0,xx1));
1500     PetscCall(VecDestroy(&xx1));
1501   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1502 
1503   PetscCall(VecDestroy(&bb1));
1504 
1505   matin->factorerrortype = mat->A->factorerrortype;
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1510 {
1511   Mat            aA,aB,Aperm;
1512   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1513   PetscScalar    *aa,*ba;
1514   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1515   PetscSF        rowsf,sf;
1516   IS             parcolp = NULL;
1517   PetscBool      done;
1518 
1519   PetscFunctionBegin;
1520   PetscCall(MatGetLocalSize(A,&m,&n));
1521   PetscCall(ISGetIndices(rowp,&rwant));
1522   PetscCall(ISGetIndices(colp,&cwant));
1523   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1524 
1525   /* Invert row permutation to find out where my rows should go */
1526   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1527   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1528   PetscCall(PetscSFSetFromOptions(rowsf));
1529   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1530   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1531   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1532 
1533   /* Invert column permutation to find out where my columns should go */
1534   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1535   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1536   PetscCall(PetscSFSetFromOptions(sf));
1537   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1538   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1539   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1540   PetscCall(PetscSFDestroy(&sf));
1541 
1542   PetscCall(ISRestoreIndices(rowp,&rwant));
1543   PetscCall(ISRestoreIndices(colp,&cwant));
1544   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1545 
1546   /* Find out where my gcols should go */
1547   PetscCall(MatGetSize(aB,NULL,&ng));
1548   PetscCall(PetscMalloc1(ng,&gcdest));
1549   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1550   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1551   PetscCall(PetscSFSetFromOptions(sf));
1552   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1553   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1554   PetscCall(PetscSFDestroy(&sf));
1555 
1556   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1557   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1558   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1559   for (i=0; i<m; i++) {
1560     PetscInt    row = rdest[i];
1561     PetscMPIInt rowner;
1562     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1563     for (j=ai[i]; j<ai[i+1]; j++) {
1564       PetscInt    col = cdest[aj[j]];
1565       PetscMPIInt cowner;
1566       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1567       if (rowner == cowner) dnnz[i]++;
1568       else onnz[i]++;
1569     }
1570     for (j=bi[i]; j<bi[i+1]; j++) {
1571       PetscInt    col = gcdest[bj[j]];
1572       PetscMPIInt cowner;
1573       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1574       if (rowner == cowner) dnnz[i]++;
1575       else onnz[i]++;
1576     }
1577   }
1578   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1579   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1580   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1581   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1582   PetscCall(PetscSFDestroy(&rowsf));
1583 
1584   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1585   PetscCall(MatSeqAIJGetArray(aA,&aa));
1586   PetscCall(MatSeqAIJGetArray(aB,&ba));
1587   for (i=0; i<m; i++) {
1588     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1589     PetscInt j0,rowlen;
1590     rowlen = ai[i+1] - ai[i];
1591     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1592       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1593       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1594     }
1595     rowlen = bi[i+1] - bi[i];
1596     for (j0=j=0; j<rowlen; j0=j) {
1597       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1598       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1599     }
1600   }
1601   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1602   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1603   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1604   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1605   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1606   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1607   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1608   PetscCall(PetscFree3(work,rdest,cdest));
1609   PetscCall(PetscFree(gcdest));
1610   if (parcolp) PetscCall(ISDestroy(&colp));
1611   *B = Aperm;
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1616 {
1617   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1618 
1619   PetscFunctionBegin;
1620   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1621   if (ghosts) *ghosts = aij->garray;
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1626 {
1627   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1628   Mat            A    = mat->A,B = mat->B;
1629   PetscLogDouble isend[5],irecv[5];
1630 
1631   PetscFunctionBegin;
1632   info->block_size = 1.0;
1633   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1634 
1635   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1636   isend[3] = info->memory;  isend[4] = info->mallocs;
1637 
1638   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1639 
1640   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1641   isend[3] += info->memory;  isend[4] += info->mallocs;
1642   if (flag == MAT_LOCAL) {
1643     info->nz_used      = isend[0];
1644     info->nz_allocated = isend[1];
1645     info->nz_unneeded  = isend[2];
1646     info->memory       = isend[3];
1647     info->mallocs      = isend[4];
1648   } else if (flag == MAT_GLOBAL_MAX) {
1649     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1650 
1651     info->nz_used      = irecv[0];
1652     info->nz_allocated = irecv[1];
1653     info->nz_unneeded  = irecv[2];
1654     info->memory       = irecv[3];
1655     info->mallocs      = irecv[4];
1656   } else if (flag == MAT_GLOBAL_SUM) {
1657     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1658 
1659     info->nz_used      = irecv[0];
1660     info->nz_allocated = irecv[1];
1661     info->nz_unneeded  = irecv[2];
1662     info->memory       = irecv[3];
1663     info->mallocs      = irecv[4];
1664   }
1665   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1666   info->fill_ratio_needed = 0;
1667   info->factor_mallocs    = 0;
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1672 {
1673   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1674 
1675   PetscFunctionBegin;
1676   switch (op) {
1677   case MAT_NEW_NONZERO_LOCATIONS:
1678   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1679   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1680   case MAT_KEEP_NONZERO_PATTERN:
1681   case MAT_NEW_NONZERO_LOCATION_ERR:
1682   case MAT_USE_INODES:
1683   case MAT_IGNORE_ZERO_ENTRIES:
1684   case MAT_FORM_EXPLICIT_TRANSPOSE:
1685     MatCheckPreallocated(A,1);
1686     PetscCall(MatSetOption(a->A,op,flg));
1687     PetscCall(MatSetOption(a->B,op,flg));
1688     break;
1689   case MAT_ROW_ORIENTED:
1690     MatCheckPreallocated(A,1);
1691     a->roworiented = flg;
1692 
1693     PetscCall(MatSetOption(a->A,op,flg));
1694     PetscCall(MatSetOption(a->B,op,flg));
1695     break;
1696   case MAT_FORCE_DIAGONAL_ENTRIES:
1697   case MAT_SORTED_FULL:
1698     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1699     break;
1700   case MAT_IGNORE_OFF_PROC_ENTRIES:
1701     a->donotstash = flg;
1702     break;
1703   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1704   case MAT_SPD:
1705   case MAT_SYMMETRIC:
1706   case MAT_STRUCTURALLY_SYMMETRIC:
1707   case MAT_HERMITIAN:
1708   case MAT_SYMMETRY_ETERNAL:
1709     break;
1710   case MAT_SUBMAT_SINGLEIS:
1711     A->submat_singleis = flg;
1712     break;
1713   case MAT_STRUCTURE_ONLY:
1714     /* The option is handled directly by MatSetOption() */
1715     break;
1716   default:
1717     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1723 {
1724   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1725   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1726   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1727   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1728   PetscInt       *cmap,*idx_p;
1729 
1730   PetscFunctionBegin;
1731   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1732   mat->getrowactive = PETSC_TRUE;
1733 
1734   if (!mat->rowvalues && (idx || v)) {
1735     /*
1736         allocate enough space to hold information from the longest row.
1737     */
1738     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1739     PetscInt   max = 1,tmp;
1740     for (i=0; i<matin->rmap->n; i++) {
1741       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1742       if (max < tmp) max = tmp;
1743     }
1744     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1745   }
1746 
1747   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1748   lrow = row - rstart;
1749 
1750   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1751   if (!v)   {pvA = NULL; pvB = NULL;}
1752   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1753   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v  || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i=0; i<nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1770         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i=0; i<imark; i++) {
1776             idx_p[i] = cmap[cworkB[i]];
1777           }
1778         } else {
1779           for (i=0; i<nzB; i++) {
1780             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1781             else break;
1782           }
1783           imark = i;
1784         }
1785         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1786         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1787       }
1788     } else {
1789       if (idx) *idx = NULL;
1790       if (v)   *v   = NULL;
1791     }
1792   }
1793   *nz  = nztot;
1794   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1795   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1800 {
1801   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1802 
1803   PetscFunctionBegin;
1804   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1805   aij->getrowactive = PETSC_FALSE;
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1810 {
1811   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1812   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1813   PetscInt        i,j,cstart = mat->cmap->rstart;
1814   PetscReal       sum = 0.0;
1815   const MatScalar *v,*amata,*bmata;
1816 
1817   PetscFunctionBegin;
1818   if (aij->size == 1) {
1819     PetscCall(MatNorm(aij->A,type,norm));
1820   } else {
1821     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1822     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1823     if (type == NORM_FROBENIUS) {
1824       v = amata;
1825       for (i=0; i<amat->nz; i++) {
1826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1827       }
1828       v = bmata;
1829       for (i=0; i<bmat->nz; i++) {
1830         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1831       }
1832       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp,*tmp2;
1837       PetscInt  *jj,*garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1839       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1840       *norm = 0.0;
1841       v     = amata; jj = amat->j;
1842       for (j=0; j<amat->nz; j++) {
1843         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1844       }
1845       v = bmata; jj = bmat->j;
1846       for (j=0; j<bmat->nz; j++) {
1847         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1848       }
1849       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1850       for (j=0; j<mat->cmap->N; j++) {
1851         if (tmp2[j] > *norm) *norm = tmp2[j];
1852       }
1853       PetscCall(PetscFree(tmp));
1854       PetscCall(PetscFree(tmp2));
1855       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1856     } else if (type == NORM_INFINITY) { /* max row norm */
1857       PetscReal ntemp = 0.0;
1858       for (j=0; j<aij->A->rmap->n; j++) {
1859         v   = amata + amat->i[j];
1860         sum = 0.0;
1861         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1862           sum += PetscAbsScalar(*v); v++;
1863         }
1864         v = bmata + bmat->i[j];
1865         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1866           sum += PetscAbsScalar(*v); v++;
1867         }
1868         if (sum > ntemp) ntemp = sum;
1869       }
1870       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1871       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1872     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1873     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1874     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1875   }
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1880 {
1881   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1882   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1883   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1884   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1885   Mat             B,A_diag,*B_diag;
1886   const MatScalar *pbv,*bv;
1887 
1888   PetscFunctionBegin;
1889   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1890   ai = Aloc->i; aj = Aloc->j;
1891   bi = Bloc->i; bj = Bloc->j;
1892   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1893     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1894     PetscSFNode          *oloc;
1895     PETSC_UNUSED PetscSF sf;
1896 
1897     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1898     /* compute d_nnz for preallocation */
1899     PetscCall(PetscArrayzero(d_nnz,na));
1900     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1901     /* compute local off-diagonal contributions */
1902     PetscCall(PetscArrayzero(g_nnz,nb));
1903     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1904     /* map those to global */
1905     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1906     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1907     PetscCall(PetscSFSetFromOptions(sf));
1908     PetscCall(PetscArrayzero(o_nnz,na));
1909     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1910     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1911     PetscCall(PetscSFDestroy(&sf));
1912 
1913     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1914     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1915     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1916     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1917     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1918     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1919   } else {
1920     B    = *matout;
1921     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1922   }
1923 
1924   b           = (Mat_MPIAIJ*)B->data;
1925   A_diag      = a->A;
1926   B_diag      = &b->A;
1927   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1928   A_diag_ncol = A_diag->cmap->N;
1929   B_diag_ilen = sub_B_diag->ilen;
1930   B_diag_i    = sub_B_diag->i;
1931 
1932   /* Set ilen for diagonal of B */
1933   for (i=0; i<A_diag_ncol; i++) {
1934     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1935   }
1936 
1937   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1938   very quickly (=without using MatSetValues), because all writes are local. */
1939   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1940 
1941   /* copy over the B part */
1942   PetscCall(PetscMalloc1(bi[mb],&cols));
1943   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1944   pbv  = bv;
1945   row  = A->rmap->rstart;
1946   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1947   cols_tmp = cols;
1948   for (i=0; i<mb; i++) {
1949     ncol = bi[i+1]-bi[i];
1950     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1951     row++;
1952     pbv += ncol; cols_tmp += ncol;
1953   }
1954   PetscCall(PetscFree(cols));
1955   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1956 
1957   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1958   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1959   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1960     *matout = B;
1961   } else {
1962     PetscCall(MatHeaderMerge(A,&B));
1963   }
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1968 {
1969   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1970   Mat            a    = aij->A,b = aij->B;
1971   PetscInt       s1,s2,s3;
1972 
1973   PetscFunctionBegin;
1974   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1975   if (rr) {
1976     PetscCall(VecGetLocalSize(rr,&s1));
1977     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1978     /* Overlap communication with computation. */
1979     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1980   }
1981   if (ll) {
1982     PetscCall(VecGetLocalSize(ll,&s1));
1983     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1984     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1985   }
1986   /* scale  the diagonal block */
1987   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1988 
1989   if (rr) {
1990     /* Do a scatter end and then right scale the off-diagonal block */
1991     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1992     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1998 {
1999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2000 
2001   PetscFunctionBegin;
2002   PetscCall(MatSetUnfactored(a->A));
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2007 {
2008   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2009   Mat            a,b,c,d;
2010   PetscBool      flg;
2011 
2012   PetscFunctionBegin;
2013   a = matA->A; b = matA->B;
2014   c = matB->A; d = matB->B;
2015 
2016   PetscCall(MatEqual(a,c,&flg));
2017   if (flg) {
2018     PetscCall(MatEqual(b,d,&flg));
2019   }
2020   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2025 {
2026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2027   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2028 
2029   PetscFunctionBegin;
2030   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2031   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2032     /* because of the column compression in the off-processor part of the matrix a->B,
2033        the number of columns in a->B and b->B may be different, hence we cannot call
2034        the MatCopy() directly on the two parts. If need be, we can provide a more
2035        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2036        then copying the submatrices */
2037     PetscCall(MatCopy_Basic(A,B,str));
2038   } else {
2039     PetscCall(MatCopy(a->A,b->A,str));
2040     PetscCall(MatCopy(a->B,b->B,str));
2041   }
2042   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2047 {
2048   PetscFunctionBegin;
2049   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 /*
2054    Computes the number of nonzeros per row needed for preallocation when X and Y
2055    have different nonzero structure.
2056 */
2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2058 {
2059   PetscInt       i,j,k,nzx,nzy;
2060 
2061   PetscFunctionBegin;
2062   /* Set the number of nonzeros in the new matrix */
2063   for (i=0; i<m; i++) {
2064     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2065     nzx = xi[i+1] - xi[i];
2066     nzy = yi[i+1] - yi[i];
2067     nnz[i] = 0;
2068     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2069       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2070       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2071       nnz[i]++;
2072     }
2073     for (; k<nzy; k++) nnz[i]++;
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2080 {
2081   PetscInt       m = Y->rmap->N;
2082   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2083   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2084 
2085   PetscFunctionBegin;
2086   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2091 {
2092   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2093 
2094   PetscFunctionBegin;
2095   if (str == SAME_NONZERO_PATTERN) {
2096     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2097     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2098   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2099     PetscCall(MatAXPY_Basic(Y,a,X,str));
2100   } else {
2101     Mat      B;
2102     PetscInt *nnz_d,*nnz_o;
2103 
2104     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2105     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2106     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2107     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2108     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2109     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2110     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2111     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2112     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2113     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2114     PetscCall(MatHeaderMerge(Y,&B));
2115     PetscCall(PetscFree(nnz_d));
2116     PetscCall(PetscFree(nnz_o));
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2122 
2123 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2124 {
2125   PetscFunctionBegin;
2126   if (PetscDefined(USE_COMPLEX)) {
2127     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2128 
2129     PetscCall(MatConjugate_SeqAIJ(aij->A));
2130     PetscCall(MatConjugate_SeqAIJ(aij->B));
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2136 {
2137   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2138 
2139   PetscFunctionBegin;
2140   PetscCall(MatRealPart(a->A));
2141   PetscCall(MatRealPart(a->B));
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2146 {
2147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2148 
2149   PetscFunctionBegin;
2150   PetscCall(MatImaginaryPart(a->A));
2151   PetscCall(MatImaginaryPart(a->B));
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2156 {
2157   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2158   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2159   PetscScalar       *va,*vv;
2160   Vec               vB,vA;
2161   const PetscScalar *vb;
2162 
2163   PetscFunctionBegin;
2164   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2165   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2166 
2167   PetscCall(VecGetArrayWrite(vA,&va));
2168   if (idx) {
2169     for (i=0; i<m; i++) {
2170       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2171     }
2172   }
2173 
2174   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2175   PetscCall(PetscMalloc1(m,&idxb));
2176   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2177 
2178   PetscCall(VecGetArrayWrite(v,&vv));
2179   PetscCall(VecGetArrayRead(vB,&vb));
2180   for (i=0; i<m; i++) {
2181     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2182       vv[i] = vb[i];
2183       if (idx) idx[i] = a->garray[idxb[i]];
2184     } else {
2185       vv[i] = va[i];
2186       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2187         idx[i] = a->garray[idxb[i]];
2188     }
2189   }
2190   PetscCall(VecRestoreArrayWrite(vA,&vv));
2191   PetscCall(VecRestoreArrayWrite(vA,&va));
2192   PetscCall(VecRestoreArrayRead(vB,&vb));
2193   PetscCall(PetscFree(idxb));
2194   PetscCall(VecDestroy(&vA));
2195   PetscCall(VecDestroy(&vB));
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2200 {
2201   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2202   PetscInt          m = A->rmap->n,n = A->cmap->n;
2203   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2204   PetscInt          *cmap  = mat->garray;
2205   PetscInt          *diagIdx, *offdiagIdx;
2206   Vec               diagV, offdiagV;
2207   PetscScalar       *a, *diagA, *offdiagA;
2208   const PetscScalar *ba,*bav;
2209   PetscInt          r,j,col,ncols,*bi,*bj;
2210   Mat               B = mat->B;
2211   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2212 
2213   PetscFunctionBegin;
2214   /* When a process holds entire A and other processes have no entry */
2215   if (A->cmap->N == n) {
2216     PetscCall(VecGetArrayWrite(v,&diagA));
2217     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2218     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2219     PetscCall(VecDestroy(&diagV));
2220     PetscCall(VecRestoreArrayWrite(v,&diagA));
2221     PetscFunctionReturn(0);
2222   } else if (n == 0) {
2223     if (m) {
2224       PetscCall(VecGetArrayWrite(v,&a));
2225       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2226       PetscCall(VecRestoreArrayWrite(v,&a));
2227     }
2228     PetscFunctionReturn(0);
2229   }
2230 
2231   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2232   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2233   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2234   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2235 
2236   /* Get offdiagIdx[] for implicit 0.0 */
2237   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2238   ba   = bav;
2239   bi   = b->i;
2240   bj   = b->j;
2241   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2242   for (r = 0; r < m; r++) {
2243     ncols = bi[r+1] - bi[r];
2244     if (ncols == A->cmap->N - n) { /* Brow is dense */
2245       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2246     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2247       offdiagA[r] = 0.0;
2248 
2249       /* Find first hole in the cmap */
2250       for (j=0; j<ncols; j++) {
2251         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2252         if (col > j && j < cstart) {
2253           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2254           break;
2255         } else if (col > j + n && j >= cstart) {
2256           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2257           break;
2258         }
2259       }
2260       if (j == ncols && ncols < A->cmap->N - n) {
2261         /* a hole is outside compressed Bcols */
2262         if (ncols == 0) {
2263           if (cstart) {
2264             offdiagIdx[r] = 0;
2265           } else offdiagIdx[r] = cend;
2266         } else { /* ncols > 0 */
2267           offdiagIdx[r] = cmap[ncols-1] + 1;
2268           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2269         }
2270       }
2271     }
2272 
2273     for (j=0; j<ncols; j++) {
2274       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2275       ba++; bj++;
2276     }
2277   }
2278 
2279   PetscCall(VecGetArrayWrite(v, &a));
2280   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2281   for (r = 0; r < m; ++r) {
2282     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2283       a[r]   = diagA[r];
2284       if (idx) idx[r] = cstart + diagIdx[r];
2285     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2286       a[r] = diagA[r];
2287       if (idx) {
2288         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2289           idx[r] = cstart + diagIdx[r];
2290         } else idx[r] = offdiagIdx[r];
2291       }
2292     } else {
2293       a[r]   = offdiagA[r];
2294       if (idx) idx[r] = offdiagIdx[r];
2295     }
2296   }
2297   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2298   PetscCall(VecRestoreArrayWrite(v, &a));
2299   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2300   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2301   PetscCall(VecDestroy(&diagV));
2302   PetscCall(VecDestroy(&offdiagV));
2303   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2308 {
2309   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2310   PetscInt          m = A->rmap->n,n = A->cmap->n;
2311   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2312   PetscInt          *cmap  = mat->garray;
2313   PetscInt          *diagIdx, *offdiagIdx;
2314   Vec               diagV, offdiagV;
2315   PetscScalar       *a, *diagA, *offdiagA;
2316   const PetscScalar *ba,*bav;
2317   PetscInt          r,j,col,ncols,*bi,*bj;
2318   Mat               B = mat->B;
2319   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2320 
2321   PetscFunctionBegin;
2322   /* When a process holds entire A and other processes have no entry */
2323   if (A->cmap->N == n) {
2324     PetscCall(VecGetArrayWrite(v,&diagA));
2325     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2326     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2327     PetscCall(VecDestroy(&diagV));
2328     PetscCall(VecRestoreArrayWrite(v,&diagA));
2329     PetscFunctionReturn(0);
2330   } else if (n == 0) {
2331     if (m) {
2332       PetscCall(VecGetArrayWrite(v,&a));
2333       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2334       PetscCall(VecRestoreArrayWrite(v,&a));
2335     }
2336     PetscFunctionReturn(0);
2337   }
2338 
2339   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2340   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2341   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2342   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2343 
2344   /* Get offdiagIdx[] for implicit 0.0 */
2345   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2346   ba   = bav;
2347   bi   = b->i;
2348   bj   = b->j;
2349   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2350   for (r = 0; r < m; r++) {
2351     ncols = bi[r+1] - bi[r];
2352     if (ncols == A->cmap->N - n) { /* Brow is dense */
2353       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2354     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2355       offdiagA[r] = 0.0;
2356 
2357       /* Find first hole in the cmap */
2358       for (j=0; j<ncols; j++) {
2359         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2360         if (col > j && j < cstart) {
2361           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2362           break;
2363         } else if (col > j + n && j >= cstart) {
2364           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2365           break;
2366         }
2367       }
2368       if (j == ncols && ncols < A->cmap->N - n) {
2369         /* a hole is outside compressed Bcols */
2370         if (ncols == 0) {
2371           if (cstart) {
2372             offdiagIdx[r] = 0;
2373           } else offdiagIdx[r] = cend;
2374         } else { /* ncols > 0 */
2375           offdiagIdx[r] = cmap[ncols-1] + 1;
2376           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2377         }
2378       }
2379     }
2380 
2381     for (j=0; j<ncols; j++) {
2382       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2383       ba++; bj++;
2384     }
2385   }
2386 
2387   PetscCall(VecGetArrayWrite(v, &a));
2388   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2389   for (r = 0; r < m; ++r) {
2390     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2391       a[r]   = diagA[r];
2392       if (idx) idx[r] = cstart + diagIdx[r];
2393     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2394       a[r] = diagA[r];
2395       if (idx) {
2396         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2397           idx[r] = cstart + diagIdx[r];
2398         } else idx[r] = offdiagIdx[r];
2399       }
2400     } else {
2401       a[r]   = offdiagA[r];
2402       if (idx) idx[r] = offdiagIdx[r];
2403     }
2404   }
2405   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2406   PetscCall(VecRestoreArrayWrite(v, &a));
2407   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2408   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2409   PetscCall(VecDestroy(&diagV));
2410   PetscCall(VecDestroy(&offdiagV));
2411   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2416 {
2417   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2418   PetscInt          m = A->rmap->n,n = A->cmap->n;
2419   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2420   PetscInt          *cmap  = mat->garray;
2421   PetscInt          *diagIdx, *offdiagIdx;
2422   Vec               diagV, offdiagV;
2423   PetscScalar       *a, *diagA, *offdiagA;
2424   const PetscScalar *ba,*bav;
2425   PetscInt          r,j,col,ncols,*bi,*bj;
2426   Mat               B = mat->B;
2427   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2428 
2429   PetscFunctionBegin;
2430   /* When a process holds entire A and other processes have no entry */
2431   if (A->cmap->N == n) {
2432     PetscCall(VecGetArrayWrite(v,&diagA));
2433     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2434     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2435     PetscCall(VecDestroy(&diagV));
2436     PetscCall(VecRestoreArrayWrite(v,&diagA));
2437     PetscFunctionReturn(0);
2438   } else if (n == 0) {
2439     if (m) {
2440       PetscCall(VecGetArrayWrite(v,&a));
2441       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2442       PetscCall(VecRestoreArrayWrite(v,&a));
2443     }
2444     PetscFunctionReturn(0);
2445   }
2446 
2447   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2448   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2449   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2450   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2451 
2452   /* Get offdiagIdx[] for implicit 0.0 */
2453   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2454   ba   = bav;
2455   bi   = b->i;
2456   bj   = b->j;
2457   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2458   for (r = 0; r < m; r++) {
2459     ncols = bi[r+1] - bi[r];
2460     if (ncols == A->cmap->N - n) { /* Brow is dense */
2461       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2462     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2463       offdiagA[r] = 0.0;
2464 
2465       /* Find first hole in the cmap */
2466       for (j=0; j<ncols; j++) {
2467         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2468         if (col > j && j < cstart) {
2469           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2470           break;
2471         } else if (col > j + n && j >= cstart) {
2472           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2473           break;
2474         }
2475       }
2476       if (j == ncols && ncols < A->cmap->N - n) {
2477         /* a hole is outside compressed Bcols */
2478         if (ncols == 0) {
2479           if (cstart) {
2480             offdiagIdx[r] = 0;
2481           } else offdiagIdx[r] = cend;
2482         } else { /* ncols > 0 */
2483           offdiagIdx[r] = cmap[ncols-1] + 1;
2484           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2485         }
2486       }
2487     }
2488 
2489     for (j=0; j<ncols; j++) {
2490       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2491       ba++; bj++;
2492     }
2493   }
2494 
2495   PetscCall(VecGetArrayWrite(v,    &a));
2496   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2497   for (r = 0; r < m; ++r) {
2498     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2499       a[r] = diagA[r];
2500       if (idx) idx[r] = cstart + diagIdx[r];
2501     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2502       a[r] = diagA[r];
2503       if (idx) {
2504         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2505           idx[r] = cstart + diagIdx[r];
2506         } else idx[r] = offdiagIdx[r];
2507       }
2508     } else {
2509       a[r] = offdiagA[r];
2510       if (idx) idx[r] = offdiagIdx[r];
2511     }
2512   }
2513   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2514   PetscCall(VecRestoreArrayWrite(v,       &a));
2515   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2516   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2517   PetscCall(VecDestroy(&diagV));
2518   PetscCall(VecDestroy(&offdiagV));
2519   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2524 {
2525   Mat            *dummy;
2526 
2527   PetscFunctionBegin;
2528   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2529   *newmat = *dummy;
2530   PetscCall(PetscFree(dummy));
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2535 {
2536   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2537 
2538   PetscFunctionBegin;
2539   PetscCall(MatInvertBlockDiagonal(a->A,values));
2540   A->factorerrortype = a->A->factorerrortype;
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2545 {
2546   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2547 
2548   PetscFunctionBegin;
2549   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2550   PetscCall(MatSetRandom(aij->A,rctx));
2551   if (x->assembled) {
2552     PetscCall(MatSetRandom(aij->B,rctx));
2553   } else {
2554     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2555   }
2556   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2557   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2562 {
2563   PetscFunctionBegin;
2564   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2565   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /*@
2570    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2571 
2572    Collective on Mat
2573 
2574    Input Parameters:
2575 +    A - the matrix
2576 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2577 
2578  Level: advanced
2579 
2580 @*/
2581 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2582 {
2583   PetscFunctionBegin;
2584   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2589 {
2590   PetscBool            sc = PETSC_FALSE,flg;
2591 
2592   PetscFunctionBegin;
2593   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2594   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2595   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2596   if (flg) {
2597     PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2598   }
2599   PetscOptionsHeadEnd();
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2604 {
2605   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2606   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2607 
2608   PetscFunctionBegin;
2609   if (!Y->preallocated) {
2610     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2611   } else if (!aij->nz) {
2612     PetscInt nonew = aij->nonew;
2613     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2614     aij->nonew = nonew;
2615   }
2616   PetscCall(MatShift_Basic(Y,a));
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2621 {
2622   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2623 
2624   PetscFunctionBegin;
2625   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2626   PetscCall(MatMissingDiagonal(a->A,missing,d));
2627   if (d) {
2628     PetscInt rstart;
2629     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2630     *d += rstart;
2631 
2632   }
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2637 {
2638   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2639 
2640   PetscFunctionBegin;
2641   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 /* -------------------------------------------------------------------*/
2646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2647                                        MatGetRow_MPIAIJ,
2648                                        MatRestoreRow_MPIAIJ,
2649                                        MatMult_MPIAIJ,
2650                                 /* 4*/ MatMultAdd_MPIAIJ,
2651                                        MatMultTranspose_MPIAIJ,
2652                                        MatMultTransposeAdd_MPIAIJ,
2653                                        NULL,
2654                                        NULL,
2655                                        NULL,
2656                                 /*10*/ NULL,
2657                                        NULL,
2658                                        NULL,
2659                                        MatSOR_MPIAIJ,
2660                                        MatTranspose_MPIAIJ,
2661                                 /*15*/ MatGetInfo_MPIAIJ,
2662                                        MatEqual_MPIAIJ,
2663                                        MatGetDiagonal_MPIAIJ,
2664                                        MatDiagonalScale_MPIAIJ,
2665                                        MatNorm_MPIAIJ,
2666                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2667                                        MatAssemblyEnd_MPIAIJ,
2668                                        MatSetOption_MPIAIJ,
2669                                        MatZeroEntries_MPIAIJ,
2670                                 /*24*/ MatZeroRows_MPIAIJ,
2671                                        NULL,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                 /*29*/ MatSetUp_MPIAIJ,
2676                                        NULL,
2677                                        NULL,
2678                                        MatGetDiagonalBlock_MPIAIJ,
2679                                        NULL,
2680                                 /*34*/ MatDuplicate_MPIAIJ,
2681                                        NULL,
2682                                        NULL,
2683                                        NULL,
2684                                        NULL,
2685                                 /*39*/ MatAXPY_MPIAIJ,
2686                                        MatCreateSubMatrices_MPIAIJ,
2687                                        MatIncreaseOverlap_MPIAIJ,
2688                                        MatGetValues_MPIAIJ,
2689                                        MatCopy_MPIAIJ,
2690                                 /*44*/ MatGetRowMax_MPIAIJ,
2691                                        MatScale_MPIAIJ,
2692                                        MatShift_MPIAIJ,
2693                                        MatDiagonalSet_MPIAIJ,
2694                                        MatZeroRowsColumns_MPIAIJ,
2695                                 /*49*/ MatSetRandom_MPIAIJ,
2696                                        NULL,
2697                                        NULL,
2698                                        NULL,
2699                                        NULL,
2700                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2701                                        NULL,
2702                                        MatSetUnfactored_MPIAIJ,
2703                                        MatPermute_MPIAIJ,
2704                                        NULL,
2705                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2706                                        MatDestroy_MPIAIJ,
2707                                        MatView_MPIAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                 /*64*/ NULL,
2711                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2712                                        NULL,
2713                                        NULL,
2714                                        NULL,
2715                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2716                                        MatGetRowMinAbs_MPIAIJ,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                 /*75*/ MatFDColoringApply_AIJ,
2722                                        MatSetFromOptions_MPIAIJ,
2723                                        NULL,
2724                                        NULL,
2725                                        MatFindZeroDiagonals_MPIAIJ,
2726                                 /*80*/ NULL,
2727                                        NULL,
2728                                        NULL,
2729                                 /*83*/ MatLoad_MPIAIJ,
2730                                        MatIsSymmetric_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                 /*89*/ NULL,
2736                                        NULL,
2737                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        MatBindToCPU_MPIAIJ,
2745                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2746                                        NULL,
2747                                        NULL,
2748                                        MatConjugate_MPIAIJ,
2749                                        NULL,
2750                                 /*104*/MatSetValuesRow_MPIAIJ,
2751                                        MatRealPart_MPIAIJ,
2752                                        MatImaginaryPart_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                 /*109*/NULL,
2756                                        NULL,
2757                                        MatGetRowMin_MPIAIJ,
2758                                        NULL,
2759                                        MatMissingDiagonal_MPIAIJ,
2760                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2761                                        NULL,
2762                                        MatGetGhosts_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        MatGetMultiProcBlock_MPIAIJ,
2770                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2771                                        MatGetColumnReductions_MPIAIJ,
2772                                        MatInvertBlockDiagonal_MPIAIJ,
2773                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2774                                        MatCreateSubMatricesMPI_MPIAIJ,
2775                                 /*129*/NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2779                                        NULL,
2780                                 /*134*/NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                 /*139*/MatSetBlockSizes_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        MatFDColoringSetUp_MPIXAIJ,
2789                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2790                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2791                                 /*145*/NULL,
2792                                        NULL,
2793                                        NULL
2794 };
2795 
2796 /* ----------------------------------------------------------------------------------------*/
2797 
2798 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2799 {
2800   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2801 
2802   PetscFunctionBegin;
2803   PetscCall(MatStoreValues(aij->A));
2804   PetscCall(MatStoreValues(aij->B));
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2809 {
2810   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2811 
2812   PetscFunctionBegin;
2813   PetscCall(MatRetrieveValues(aij->A));
2814   PetscCall(MatRetrieveValues(aij->B));
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2819 {
2820   Mat_MPIAIJ     *b;
2821   PetscMPIInt    size;
2822 
2823   PetscFunctionBegin;
2824   PetscCall(PetscLayoutSetUp(B->rmap));
2825   PetscCall(PetscLayoutSetUp(B->cmap));
2826   b = (Mat_MPIAIJ*)B->data;
2827 
2828 #if defined(PETSC_USE_CTABLE)
2829   PetscCall(PetscTableDestroy(&b->colmap));
2830 #else
2831   PetscCall(PetscFree(b->colmap));
2832 #endif
2833   PetscCall(PetscFree(b->garray));
2834   PetscCall(VecDestroy(&b->lvec));
2835   PetscCall(VecScatterDestroy(&b->Mvctx));
2836 
2837   /* Because the B will have been resized we simply destroy it and create a new one each time */
2838   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2839   PetscCall(MatDestroy(&b->B));
2840   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2841   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2842   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2843   PetscCall(MatSetType(b->B,MATSEQAIJ));
2844   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2845 
2846   if (!B->preallocated) {
2847     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2848     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2849     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2850     PetscCall(MatSetType(b->A,MATSEQAIJ));
2851     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2852   }
2853 
2854   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2855   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2856   B->preallocated  = PETSC_TRUE;
2857   B->was_assembled = PETSC_FALSE;
2858   B->assembled     = PETSC_FALSE;
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2863 {
2864   Mat_MPIAIJ     *b;
2865 
2866   PetscFunctionBegin;
2867   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2868   PetscCall(PetscLayoutSetUp(B->rmap));
2869   PetscCall(PetscLayoutSetUp(B->cmap));
2870   b = (Mat_MPIAIJ*)B->data;
2871 
2872 #if defined(PETSC_USE_CTABLE)
2873   PetscCall(PetscTableDestroy(&b->colmap));
2874 #else
2875   PetscCall(PetscFree(b->colmap));
2876 #endif
2877   PetscCall(PetscFree(b->garray));
2878   PetscCall(VecDestroy(&b->lvec));
2879   PetscCall(VecScatterDestroy(&b->Mvctx));
2880 
2881   PetscCall(MatResetPreallocation(b->A));
2882   PetscCall(MatResetPreallocation(b->B));
2883   B->preallocated  = PETSC_TRUE;
2884   B->was_assembled = PETSC_FALSE;
2885   B->assembled = PETSC_FALSE;
2886   PetscFunctionReturn(0);
2887 }
2888 
2889 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2890 {
2891   Mat            mat;
2892   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2893 
2894   PetscFunctionBegin;
2895   *newmat = NULL;
2896   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2897   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2898   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2899   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2900   a       = (Mat_MPIAIJ*)mat->data;
2901 
2902   mat->factortype   = matin->factortype;
2903   mat->assembled    = matin->assembled;
2904   mat->insertmode   = NOT_SET_VALUES;
2905   mat->preallocated = matin->preallocated;
2906 
2907   a->size         = oldmat->size;
2908   a->rank         = oldmat->rank;
2909   a->donotstash   = oldmat->donotstash;
2910   a->roworiented  = oldmat->roworiented;
2911   a->rowindices   = NULL;
2912   a->rowvalues    = NULL;
2913   a->getrowactive = PETSC_FALSE;
2914 
2915   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2916   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2917 
2918   if (oldmat->colmap) {
2919 #if defined(PETSC_USE_CTABLE)
2920     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2921 #else
2922     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2923     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2924     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2925 #endif
2926   } else a->colmap = NULL;
2927   if (oldmat->garray) {
2928     PetscInt len;
2929     len  = oldmat->B->cmap->n;
2930     PetscCall(PetscMalloc1(len+1,&a->garray));
2931     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2932     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2933   } else a->garray = NULL;
2934 
2935   /* It may happen MatDuplicate is called with a non-assembled matrix
2936      In fact, MatDuplicate only requires the matrix to be preallocated
2937      This may happen inside a DMCreateMatrix_Shell */
2938   if (oldmat->lvec) {
2939     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2940     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2941   }
2942   if (oldmat->Mvctx) {
2943     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2944     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2945   }
2946   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2947   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2948   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2949   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2950   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2951   *newmat = mat;
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2956 {
2957   PetscBool      isbinary, ishdf5;
2958 
2959   PetscFunctionBegin;
2960   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2961   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2962   /* force binary viewer to load .info file if it has not yet done so */
2963   PetscCall(PetscViewerSetUp(viewer));
2964   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2965   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2966   if (isbinary) {
2967     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2968   } else if (ishdf5) {
2969 #if defined(PETSC_HAVE_HDF5)
2970     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2971 #else
2972     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2973 #endif
2974   } else {
2975     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2976   }
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2981 {
2982   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2983   PetscInt       *rowidxs,*colidxs;
2984   PetscScalar    *matvals;
2985 
2986   PetscFunctionBegin;
2987   PetscCall(PetscViewerSetUp(viewer));
2988 
2989   /* read in matrix header */
2990   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
2991   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2992   M  = header[1]; N = header[2]; nz = header[3];
2993   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
2994   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
2995   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2996 
2997   /* set block sizes from the viewer's .info file */
2998   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
2999   /* set global sizes if not set already */
3000   if (mat->rmap->N < 0) mat->rmap->N = M;
3001   if (mat->cmap->N < 0) mat->cmap->N = N;
3002   PetscCall(PetscLayoutSetUp(mat->rmap));
3003   PetscCall(PetscLayoutSetUp(mat->cmap));
3004 
3005   /* check if the matrix sizes are correct */
3006   PetscCall(MatGetSize(mat,&rows,&cols));
3007   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3008 
3009   /* read in row lengths and build row indices */
3010   PetscCall(MatGetLocalSize(mat,&m,NULL));
3011   PetscCall(PetscMalloc1(m+1,&rowidxs));
3012   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3013   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3014   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3015   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3016   /* read in column indices and matrix values */
3017   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3018   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3019   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3020   /* store matrix indices and values */
3021   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3022   PetscCall(PetscFree(rowidxs));
3023   PetscCall(PetscFree2(colidxs,matvals));
3024   PetscFunctionReturn(0);
3025 }
3026 
3027 /* Not scalable because of ISAllGather() unless getting all columns. */
3028 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3029 {
3030   IS             iscol_local;
3031   PetscBool      isstride;
3032   PetscMPIInt    lisstride=0,gisstride;
3033 
3034   PetscFunctionBegin;
3035   /* check if we are grabbing all columns*/
3036   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3037 
3038   if (isstride) {
3039     PetscInt  start,len,mstart,mlen;
3040     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3041     PetscCall(ISGetLocalSize(iscol,&len));
3042     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3043     if (mstart == start && mlen-mstart == len) lisstride = 1;
3044   }
3045 
3046   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3047   if (gisstride) {
3048     PetscInt N;
3049     PetscCall(MatGetSize(mat,NULL,&N));
3050     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3051     PetscCall(ISSetIdentity(iscol_local));
3052     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3053   } else {
3054     PetscInt cbs;
3055     PetscCall(ISGetBlockSize(iscol,&cbs));
3056     PetscCall(ISAllGather(iscol,&iscol_local));
3057     PetscCall(ISSetBlockSize(iscol_local,cbs));
3058   }
3059 
3060   *isseq = iscol_local;
3061   PetscFunctionReturn(0);
3062 }
3063 
3064 /*
3065  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3066  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3067 
3068  Input Parameters:
3069    mat - matrix
3070    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3071            i.e., mat->rstart <= isrow[i] < mat->rend
3072    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3073            i.e., mat->cstart <= iscol[i] < mat->cend
3074  Output Parameter:
3075    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3076    iscol_o - sequential column index set for retrieving mat->B
3077    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3078  */
3079 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3080 {
3081   Vec            x,cmap;
3082   const PetscInt *is_idx;
3083   PetscScalar    *xarray,*cmaparray;
3084   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3085   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3086   Mat            B=a->B;
3087   Vec            lvec=a->lvec,lcmap;
3088   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3089   MPI_Comm       comm;
3090   VecScatter     Mvctx=a->Mvctx;
3091 
3092   PetscFunctionBegin;
3093   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3094   PetscCall(ISGetLocalSize(iscol,&ncols));
3095 
3096   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3097   PetscCall(MatCreateVecs(mat,&x,NULL));
3098   PetscCall(VecSet(x,-1.0));
3099   PetscCall(VecDuplicate(x,&cmap));
3100   PetscCall(VecSet(cmap,-1.0));
3101 
3102   /* Get start indices */
3103   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3104   isstart -= ncols;
3105   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3106 
3107   PetscCall(ISGetIndices(iscol,&is_idx));
3108   PetscCall(VecGetArray(x,&xarray));
3109   PetscCall(VecGetArray(cmap,&cmaparray));
3110   PetscCall(PetscMalloc1(ncols,&idx));
3111   for (i=0; i<ncols; i++) {
3112     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3113     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3114     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3115   }
3116   PetscCall(VecRestoreArray(x,&xarray));
3117   PetscCall(VecRestoreArray(cmap,&cmaparray));
3118   PetscCall(ISRestoreIndices(iscol,&is_idx));
3119 
3120   /* Get iscol_d */
3121   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3122   PetscCall(ISGetBlockSize(iscol,&i));
3123   PetscCall(ISSetBlockSize(*iscol_d,i));
3124 
3125   /* Get isrow_d */
3126   PetscCall(ISGetLocalSize(isrow,&m));
3127   rstart = mat->rmap->rstart;
3128   PetscCall(PetscMalloc1(m,&idx));
3129   PetscCall(ISGetIndices(isrow,&is_idx));
3130   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3131   PetscCall(ISRestoreIndices(isrow,&is_idx));
3132 
3133   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3134   PetscCall(ISGetBlockSize(isrow,&i));
3135   PetscCall(ISSetBlockSize(*isrow_d,i));
3136 
3137   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3138   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3139   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3140 
3141   PetscCall(VecDuplicate(lvec,&lcmap));
3142 
3143   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3144   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3145 
3146   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3147   /* off-process column indices */
3148   count = 0;
3149   PetscCall(PetscMalloc1(Bn,&idx));
3150   PetscCall(PetscMalloc1(Bn,&cmap1));
3151 
3152   PetscCall(VecGetArray(lvec,&xarray));
3153   PetscCall(VecGetArray(lcmap,&cmaparray));
3154   for (i=0; i<Bn; i++) {
3155     if (PetscRealPart(xarray[i]) > -1.0) {
3156       idx[count]     = i;                   /* local column index in off-diagonal part B */
3157       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3158       count++;
3159     }
3160   }
3161   PetscCall(VecRestoreArray(lvec,&xarray));
3162   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3163 
3164   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3165   /* cannot ensure iscol_o has same blocksize as iscol! */
3166 
3167   PetscCall(PetscFree(idx));
3168   *garray = cmap1;
3169 
3170   PetscCall(VecDestroy(&x));
3171   PetscCall(VecDestroy(&cmap));
3172   PetscCall(VecDestroy(&lcmap));
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3177 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3178 {
3179   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3180   Mat            M = NULL;
3181   MPI_Comm       comm;
3182   IS             iscol_d,isrow_d,iscol_o;
3183   Mat            Asub = NULL,Bsub = NULL;
3184   PetscInt       n;
3185 
3186   PetscFunctionBegin;
3187   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3188 
3189   if (call == MAT_REUSE_MATRIX) {
3190     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3191     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3192     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3193 
3194     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3195     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3196 
3197     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3198     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3199 
3200     /* Update diagonal and off-diagonal portions of submat */
3201     asub = (Mat_MPIAIJ*)(*submat)->data;
3202     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3203     PetscCall(ISGetLocalSize(iscol_o,&n));
3204     if (n) {
3205       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3206     }
3207     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3208     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3209 
3210   } else { /* call == MAT_INITIAL_MATRIX) */
3211     const PetscInt *garray;
3212     PetscInt        BsubN;
3213 
3214     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3215     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3216 
3217     /* Create local submatrices Asub and Bsub */
3218     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3219     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3220 
3221     /* Create submatrix M */
3222     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3223 
3224     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3225     asub = (Mat_MPIAIJ*)M->data;
3226 
3227     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3228     n = asub->B->cmap->N;
3229     if (BsubN > n) {
3230       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3231       const PetscInt *idx;
3232       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3233       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3234 
3235       PetscCall(PetscMalloc1(n,&idx_new));
3236       j = 0;
3237       PetscCall(ISGetIndices(iscol_o,&idx));
3238       for (i=0; i<n; i++) {
3239         if (j >= BsubN) break;
3240         while (subgarray[i] > garray[j]) j++;
3241 
3242         if (subgarray[i] == garray[j]) {
3243           idx_new[i] = idx[j++];
3244         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3245       }
3246       PetscCall(ISRestoreIndices(iscol_o,&idx));
3247 
3248       PetscCall(ISDestroy(&iscol_o));
3249       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3250 
3251     } else if (BsubN < n) {
3252       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3253     }
3254 
3255     PetscCall(PetscFree(garray));
3256     *submat = M;
3257 
3258     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3259     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3260     PetscCall(ISDestroy(&isrow_d));
3261 
3262     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3263     PetscCall(ISDestroy(&iscol_d));
3264 
3265     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3266     PetscCall(ISDestroy(&iscol_o));
3267   }
3268   PetscFunctionReturn(0);
3269 }
3270 
3271 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3272 {
3273   IS             iscol_local=NULL,isrow_d;
3274   PetscInt       csize;
3275   PetscInt       n,i,j,start,end;
3276   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3277   MPI_Comm       comm;
3278 
3279   PetscFunctionBegin;
3280   /* If isrow has same processor distribution as mat,
3281      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3282   if (call == MAT_REUSE_MATRIX) {
3283     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3284     if (isrow_d) {
3285       sameRowDist  = PETSC_TRUE;
3286       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3287     } else {
3288       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3289       if (iscol_local) {
3290         sameRowDist  = PETSC_TRUE;
3291         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3292       }
3293     }
3294   } else {
3295     /* Check if isrow has same processor distribution as mat */
3296     sameDist[0] = PETSC_FALSE;
3297     PetscCall(ISGetLocalSize(isrow,&n));
3298     if (!n) {
3299       sameDist[0] = PETSC_TRUE;
3300     } else {
3301       PetscCall(ISGetMinMax(isrow,&i,&j));
3302       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3303       if (i >= start && j < end) {
3304         sameDist[0] = PETSC_TRUE;
3305       }
3306     }
3307 
3308     /* Check if iscol has same processor distribution as mat */
3309     sameDist[1] = PETSC_FALSE;
3310     PetscCall(ISGetLocalSize(iscol,&n));
3311     if (!n) {
3312       sameDist[1] = PETSC_TRUE;
3313     } else {
3314       PetscCall(ISGetMinMax(iscol,&i,&j));
3315       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3316       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3317     }
3318 
3319     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3320     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3321     sameRowDist = tsameDist[0];
3322   }
3323 
3324   if (sameRowDist) {
3325     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3326       /* isrow and iscol have same processor distribution as mat */
3327       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3328       PetscFunctionReturn(0);
3329     } else { /* sameRowDist */
3330       /* isrow has same processor distribution as mat */
3331       if (call == MAT_INITIAL_MATRIX) {
3332         PetscBool sorted;
3333         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3334         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3335         PetscCall(ISGetSize(iscol,&i));
3336         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3337 
3338         PetscCall(ISSorted(iscol_local,&sorted));
3339         if (sorted) {
3340           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3341           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3342           PetscFunctionReturn(0);
3343         }
3344       } else { /* call == MAT_REUSE_MATRIX */
3345         IS iscol_sub;
3346         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3347         if (iscol_sub) {
3348           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3349           PetscFunctionReturn(0);
3350         }
3351       }
3352     }
3353   }
3354 
3355   /* General case: iscol -> iscol_local which has global size of iscol */
3356   if (call == MAT_REUSE_MATRIX) {
3357     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3358     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3359   } else {
3360     if (!iscol_local) {
3361       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3362     }
3363   }
3364 
3365   PetscCall(ISGetLocalSize(iscol,&csize));
3366   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3367 
3368   if (call == MAT_INITIAL_MATRIX) {
3369     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3370     PetscCall(ISDestroy(&iscol_local));
3371   }
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 /*@C
3376      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3377          and "off-diagonal" part of the matrix in CSR format.
3378 
3379    Collective
3380 
3381    Input Parameters:
3382 +  comm - MPI communicator
3383 .  A - "diagonal" portion of matrix
3384 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3385 -  garray - global index of B columns
3386 
3387    Output Parameter:
3388 .   mat - the matrix, with input A as its local diagonal matrix
3389    Level: advanced
3390 
3391    Notes:
3392        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3393        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3394 
3395 .seealso: MatCreateMPIAIJWithSplitArrays()
3396 @*/
3397 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3398 {
3399   Mat_MPIAIJ        *maij;
3400   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3401   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3402   const PetscScalar *oa;
3403   Mat               Bnew;
3404   PetscInt          m,n,N;
3405 
3406   PetscFunctionBegin;
3407   PetscCall(MatCreate(comm,mat));
3408   PetscCall(MatGetSize(A,&m,&n));
3409   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3410   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3411   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3412   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3413 
3414   /* Get global columns of mat */
3415   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3416 
3417   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3418   PetscCall(MatSetType(*mat,MATMPIAIJ));
3419   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3420   maij = (Mat_MPIAIJ*)(*mat)->data;
3421 
3422   (*mat)->preallocated = PETSC_TRUE;
3423 
3424   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3425   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3426 
3427   /* Set A as diagonal portion of *mat */
3428   maij->A = A;
3429 
3430   nz = oi[m];
3431   for (i=0; i<nz; i++) {
3432     col   = oj[i];
3433     oj[i] = garray[col];
3434   }
3435 
3436   /* Set Bnew as off-diagonal portion of *mat */
3437   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3438   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3439   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3440   bnew        = (Mat_SeqAIJ*)Bnew->data;
3441   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3442   maij->B     = Bnew;
3443 
3444   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3445 
3446   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3447   b->free_a       = PETSC_FALSE;
3448   b->free_ij      = PETSC_FALSE;
3449   PetscCall(MatDestroy(&B));
3450 
3451   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3452   bnew->free_a       = PETSC_TRUE;
3453   bnew->free_ij      = PETSC_TRUE;
3454 
3455   /* condense columns of maij->B */
3456   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3457   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3458   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3459   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3460   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3461   PetscFunctionReturn(0);
3462 }
3463 
3464 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3465 
3466 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3467 {
3468   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3469   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3470   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3471   Mat            M,Msub,B=a->B;
3472   MatScalar      *aa;
3473   Mat_SeqAIJ     *aij;
3474   PetscInt       *garray = a->garray,*colsub,Ncols;
3475   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3476   IS             iscol_sub,iscmap;
3477   const PetscInt *is_idx,*cmap;
3478   PetscBool      allcolumns=PETSC_FALSE;
3479   MPI_Comm       comm;
3480 
3481   PetscFunctionBegin;
3482   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3483   if (call == MAT_REUSE_MATRIX) {
3484     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3485     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3486     PetscCall(ISGetLocalSize(iscol_sub,&count));
3487 
3488     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3489     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3490 
3491     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3492     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3493 
3494     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3495 
3496   } else { /* call == MAT_INITIAL_MATRIX) */
3497     PetscBool flg;
3498 
3499     PetscCall(ISGetLocalSize(iscol,&n));
3500     PetscCall(ISGetSize(iscol,&Ncols));
3501 
3502     /* (1) iscol -> nonscalable iscol_local */
3503     /* Check for special case: each processor gets entire matrix columns */
3504     PetscCall(ISIdentity(iscol_local,&flg));
3505     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3506     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3507     if (allcolumns) {
3508       iscol_sub = iscol_local;
3509       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3510       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3511 
3512     } else {
3513       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3514       PetscInt *idx,*cmap1,k;
3515       PetscCall(PetscMalloc1(Ncols,&idx));
3516       PetscCall(PetscMalloc1(Ncols,&cmap1));
3517       PetscCall(ISGetIndices(iscol_local,&is_idx));
3518       count = 0;
3519       k     = 0;
3520       for (i=0; i<Ncols; i++) {
3521         j = is_idx[i];
3522         if (j >= cstart && j < cend) {
3523           /* diagonal part of mat */
3524           idx[count]     = j;
3525           cmap1[count++] = i; /* column index in submat */
3526         } else if (Bn) {
3527           /* off-diagonal part of mat */
3528           if (j == garray[k]) {
3529             idx[count]     = j;
3530             cmap1[count++] = i;  /* column index in submat */
3531           } else if (j > garray[k]) {
3532             while (j > garray[k] && k < Bn-1) k++;
3533             if (j == garray[k]) {
3534               idx[count]     = j;
3535               cmap1[count++] = i; /* column index in submat */
3536             }
3537           }
3538         }
3539       }
3540       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3541 
3542       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3543       PetscCall(ISGetBlockSize(iscol,&cbs));
3544       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3545 
3546       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3547     }
3548 
3549     /* (3) Create sequential Msub */
3550     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3551   }
3552 
3553   PetscCall(ISGetLocalSize(iscol_sub,&count));
3554   aij  = (Mat_SeqAIJ*)(Msub)->data;
3555   ii   = aij->i;
3556   PetscCall(ISGetIndices(iscmap,&cmap));
3557 
3558   /*
3559       m - number of local rows
3560       Ncols - number of columns (same on all processors)
3561       rstart - first row in new global matrix generated
3562   */
3563   PetscCall(MatGetSize(Msub,&m,NULL));
3564 
3565   if (call == MAT_INITIAL_MATRIX) {
3566     /* (4) Create parallel newmat */
3567     PetscMPIInt    rank,size;
3568     PetscInt       csize;
3569 
3570     PetscCallMPI(MPI_Comm_size(comm,&size));
3571     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3572 
3573     /*
3574         Determine the number of non-zeros in the diagonal and off-diagonal
3575         portions of the matrix in order to do correct preallocation
3576     */
3577 
3578     /* first get start and end of "diagonal" columns */
3579     PetscCall(ISGetLocalSize(iscol,&csize));
3580     if (csize == PETSC_DECIDE) {
3581       PetscCall(ISGetSize(isrow,&mglobal));
3582       if (mglobal == Ncols) { /* square matrix */
3583         nlocal = m;
3584       } else {
3585         nlocal = Ncols/size + ((Ncols % size) > rank);
3586       }
3587     } else {
3588       nlocal = csize;
3589     }
3590     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3591     rstart = rend - nlocal;
3592     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3593 
3594     /* next, compute all the lengths */
3595     jj    = aij->j;
3596     PetscCall(PetscMalloc1(2*m+1,&dlens));
3597     olens = dlens + m;
3598     for (i=0; i<m; i++) {
3599       jend = ii[i+1] - ii[i];
3600       olen = 0;
3601       dlen = 0;
3602       for (j=0; j<jend; j++) {
3603         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3604         else dlen++;
3605         jj++;
3606       }
3607       olens[i] = olen;
3608       dlens[i] = dlen;
3609     }
3610 
3611     PetscCall(ISGetBlockSize(isrow,&bs));
3612     PetscCall(ISGetBlockSize(iscol,&cbs));
3613 
3614     PetscCall(MatCreate(comm,&M));
3615     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3616     PetscCall(MatSetBlockSizes(M,bs,cbs));
3617     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3618     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3619     PetscCall(PetscFree(dlens));
3620 
3621   } else { /* call == MAT_REUSE_MATRIX */
3622     M    = *newmat;
3623     PetscCall(MatGetLocalSize(M,&i,NULL));
3624     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3625     PetscCall(MatZeroEntries(M));
3626     /*
3627          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3628        rather than the slower MatSetValues().
3629     */
3630     M->was_assembled = PETSC_TRUE;
3631     M->assembled     = PETSC_FALSE;
3632   }
3633 
3634   /* (5) Set values of Msub to *newmat */
3635   PetscCall(PetscMalloc1(count,&colsub));
3636   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3637 
3638   jj   = aij->j;
3639   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3640   for (i=0; i<m; i++) {
3641     row = rstart + i;
3642     nz  = ii[i+1] - ii[i];
3643     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3644     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3645     jj += nz; aa += nz;
3646   }
3647   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3648   PetscCall(ISRestoreIndices(iscmap,&cmap));
3649 
3650   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3651   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3652 
3653   PetscCall(PetscFree(colsub));
3654 
3655   /* save Msub, iscol_sub and iscmap used in processor for next request */
3656   if (call == MAT_INITIAL_MATRIX) {
3657     *newmat = M;
3658     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3659     PetscCall(MatDestroy(&Msub));
3660 
3661     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3662     PetscCall(ISDestroy(&iscol_sub));
3663 
3664     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3665     PetscCall(ISDestroy(&iscmap));
3666 
3667     if (iscol_local) {
3668       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3669       PetscCall(ISDestroy(&iscol_local));
3670     }
3671   }
3672   PetscFunctionReturn(0);
3673 }
3674 
3675 /*
3676     Not great since it makes two copies of the submatrix, first an SeqAIJ
3677   in local and then by concatenating the local matrices the end result.
3678   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3679 
3680   Note: This requires a sequential iscol with all indices.
3681 */
3682 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3683 {
3684   PetscMPIInt    rank,size;
3685   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3686   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3687   Mat            M,Mreuse;
3688   MatScalar      *aa,*vwork;
3689   MPI_Comm       comm;
3690   Mat_SeqAIJ     *aij;
3691   PetscBool      colflag,allcolumns=PETSC_FALSE;
3692 
3693   PetscFunctionBegin;
3694   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3695   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3696   PetscCallMPI(MPI_Comm_size(comm,&size));
3697 
3698   /* Check for special case: each processor gets entire matrix columns */
3699   PetscCall(ISIdentity(iscol,&colflag));
3700   PetscCall(ISGetLocalSize(iscol,&n));
3701   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3702   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3703 
3704   if (call ==  MAT_REUSE_MATRIX) {
3705     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3706     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3707     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3708   } else {
3709     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3710   }
3711 
3712   /*
3713       m - number of local rows
3714       n - number of columns (same on all processors)
3715       rstart - first row in new global matrix generated
3716   */
3717   PetscCall(MatGetSize(Mreuse,&m,&n));
3718   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3719   if (call == MAT_INITIAL_MATRIX) {
3720     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3721     ii  = aij->i;
3722     jj  = aij->j;
3723 
3724     /*
3725         Determine the number of non-zeros in the diagonal and off-diagonal
3726         portions of the matrix in order to do correct preallocation
3727     */
3728 
3729     /* first get start and end of "diagonal" columns */
3730     if (csize == PETSC_DECIDE) {
3731       PetscCall(ISGetSize(isrow,&mglobal));
3732       if (mglobal == n) { /* square matrix */
3733         nlocal = m;
3734       } else {
3735         nlocal = n/size + ((n % size) > rank);
3736       }
3737     } else {
3738       nlocal = csize;
3739     }
3740     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3741     rstart = rend - nlocal;
3742     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3743 
3744     /* next, compute all the lengths */
3745     PetscCall(PetscMalloc1(2*m+1,&dlens));
3746     olens = dlens + m;
3747     for (i=0; i<m; i++) {
3748       jend = ii[i+1] - ii[i];
3749       olen = 0;
3750       dlen = 0;
3751       for (j=0; j<jend; j++) {
3752         if (*jj < rstart || *jj >= rend) olen++;
3753         else dlen++;
3754         jj++;
3755       }
3756       olens[i] = olen;
3757       dlens[i] = dlen;
3758     }
3759     PetscCall(MatCreate(comm,&M));
3760     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3761     PetscCall(MatSetBlockSizes(M,bs,cbs));
3762     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3763     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3764     PetscCall(PetscFree(dlens));
3765   } else {
3766     PetscInt ml,nl;
3767 
3768     M    = *newmat;
3769     PetscCall(MatGetLocalSize(M,&ml,&nl));
3770     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3771     PetscCall(MatZeroEntries(M));
3772     /*
3773          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3774        rather than the slower MatSetValues().
3775     */
3776     M->was_assembled = PETSC_TRUE;
3777     M->assembled     = PETSC_FALSE;
3778   }
3779   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3780   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3781   ii   = aij->i;
3782   jj   = aij->j;
3783 
3784   /* trigger copy to CPU if needed */
3785   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3786   for (i=0; i<m; i++) {
3787     row   = rstart + i;
3788     nz    = ii[i+1] - ii[i];
3789     cwork = jj; jj += nz;
3790     vwork = aa; aa += nz;
3791     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3792   }
3793   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3794 
3795   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3796   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3797   *newmat = M;
3798 
3799   /* save submatrix used in processor for next request */
3800   if (call ==  MAT_INITIAL_MATRIX) {
3801     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3802     PetscCall(MatDestroy(&Mreuse));
3803   }
3804   PetscFunctionReturn(0);
3805 }
3806 
3807 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3808 {
3809   PetscInt       m,cstart, cend,j,nnz,i,d;
3810   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3811   const PetscInt *JJ;
3812   PetscBool      nooffprocentries;
3813 
3814   PetscFunctionBegin;
3815   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3816 
3817   PetscCall(PetscLayoutSetUp(B->rmap));
3818   PetscCall(PetscLayoutSetUp(B->cmap));
3819   m      = B->rmap->n;
3820   cstart = B->cmap->rstart;
3821   cend   = B->cmap->rend;
3822   rstart = B->rmap->rstart;
3823 
3824   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3825 
3826   if (PetscDefined(USE_DEBUG)) {
3827     for (i=0; i<m; i++) {
3828       nnz = Ii[i+1]- Ii[i];
3829       JJ  = J + Ii[i];
3830       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3831       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3832       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3833     }
3834   }
3835 
3836   for (i=0; i<m; i++) {
3837     nnz     = Ii[i+1]- Ii[i];
3838     JJ      = J + Ii[i];
3839     nnz_max = PetscMax(nnz_max,nnz);
3840     d       = 0;
3841     for (j=0; j<nnz; j++) {
3842       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3843     }
3844     d_nnz[i] = d;
3845     o_nnz[i] = nnz - d;
3846   }
3847   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3848   PetscCall(PetscFree2(d_nnz,o_nnz));
3849 
3850   for (i=0; i<m; i++) {
3851     ii   = i + rstart;
3852     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3853   }
3854   nooffprocentries    = B->nooffprocentries;
3855   B->nooffprocentries = PETSC_TRUE;
3856   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3857   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3858   B->nooffprocentries = nooffprocentries;
3859 
3860   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3861   PetscFunctionReturn(0);
3862 }
3863 
3864 /*@
3865    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3866    (the default parallel PETSc format).
3867 
3868    Collective
3869 
3870    Input Parameters:
3871 +  B - the matrix
3872 .  i - the indices into j for the start of each local row (starts with zero)
3873 .  j - the column indices for each local row (starts with zero)
3874 -  v - optional values in the matrix
3875 
3876    Level: developer
3877 
3878    Notes:
3879        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3880      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3881      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3882 
3883        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3884 
3885        The format which is used for the sparse matrix input, is equivalent to a
3886     row-major ordering.. i.e for the following matrix, the input data expected is
3887     as shown
3888 
3889 $        1 0 0
3890 $        2 0 3     P0
3891 $       -------
3892 $        4 5 6     P1
3893 $
3894 $     Process0 [P0]: rows_owned=[0,1]
3895 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3896 $        j =  {0,0,2}  [size = 3]
3897 $        v =  {1,2,3}  [size = 3]
3898 $
3899 $     Process1 [P1]: rows_owned=[2]
3900 $        i =  {0,3}    [size = nrow+1  = 1+1]
3901 $        j =  {0,1,2}  [size = 3]
3902 $        v =  {4,5,6}  [size = 3]
3903 
3904 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3905           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3906 @*/
3907 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3908 {
3909   PetscFunctionBegin;
3910   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 /*@C
3915    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3916    (the default parallel PETSc format).  For good matrix assembly performance
3917    the user should preallocate the matrix storage by setting the parameters
3918    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3919    performance can be increased by more than a factor of 50.
3920 
3921    Collective
3922 
3923    Input Parameters:
3924 +  B - the matrix
3925 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3926            (same value is used for all local rows)
3927 .  d_nnz - array containing the number of nonzeros in the various rows of the
3928            DIAGONAL portion of the local submatrix (possibly different for each row)
3929            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3930            The size of this array is equal to the number of local rows, i.e 'm'.
3931            For matrices that will be factored, you must leave room for (and set)
3932            the diagonal entry even if it is zero.
3933 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3934            submatrix (same value is used for all local rows).
3935 -  o_nnz - array containing the number of nonzeros in the various rows of the
3936            OFF-DIAGONAL portion of the local submatrix (possibly different for
3937            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3938            structure. The size of this array is equal to the number
3939            of local rows, i.e 'm'.
3940 
3941    If the *_nnz parameter is given then the *_nz parameter is ignored
3942 
3943    The AIJ format (also called the Yale sparse matrix format or
3944    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3945    storage.  The stored row and column indices begin with zero.
3946    See Users-Manual: ch_mat for details.
3947 
3948    The parallel matrix is partitioned such that the first m0 rows belong to
3949    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3950    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3951 
3952    The DIAGONAL portion of the local submatrix of a processor can be defined
3953    as the submatrix which is obtained by extraction the part corresponding to
3954    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3955    first row that belongs to the processor, r2 is the last row belonging to
3956    the this processor, and c1-c2 is range of indices of the local part of a
3957    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3958    common case of a square matrix, the row and column ranges are the same and
3959    the DIAGONAL part is also square. The remaining portion of the local
3960    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3961 
3962    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3963 
3964    You can call MatGetInfo() to get information on how effective the preallocation was;
3965    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3966    You can also run with the option -info and look for messages with the string
3967    malloc in them to see if additional memory allocation was needed.
3968 
3969    Example usage:
3970 
3971    Consider the following 8x8 matrix with 34 non-zero values, that is
3972    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3973    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3974    as follows:
3975 
3976 .vb
3977             1  2  0  |  0  3  0  |  0  4
3978     Proc0   0  5  6  |  7  0  0  |  8  0
3979             9  0 10  | 11  0  0  | 12  0
3980     -------------------------------------
3981            13  0 14  | 15 16 17  |  0  0
3982     Proc1   0 18  0  | 19 20 21  |  0  0
3983             0  0  0  | 22 23  0  | 24  0
3984     -------------------------------------
3985     Proc2  25 26 27  |  0  0 28  | 29  0
3986            30  0  0  | 31 32 33  |  0 34
3987 .ve
3988 
3989    This can be represented as a collection of submatrices as:
3990 
3991 .vb
3992       A B C
3993       D E F
3994       G H I
3995 .ve
3996 
3997    Where the submatrices A,B,C are owned by proc0, D,E,F are
3998    owned by proc1, G,H,I are owned by proc2.
3999 
4000    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4001    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4002    The 'M','N' parameters are 8,8, and have the same values on all procs.
4003 
4004    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4005    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4006    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4007    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4008    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4009    matrix, ans [DF] as another SeqAIJ matrix.
4010 
4011    When d_nz, o_nz parameters are specified, d_nz storage elements are
4012    allocated for every row of the local diagonal submatrix, and o_nz
4013    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4014    One way to choose d_nz and o_nz is to use the max nonzerors per local
4015    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4016    In this case, the values of d_nz,o_nz are:
4017 .vb
4018      proc0 : dnz = 2, o_nz = 2
4019      proc1 : dnz = 3, o_nz = 2
4020      proc2 : dnz = 1, o_nz = 4
4021 .ve
4022    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4023    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4024    for proc3. i.e we are using 12+15+10=37 storage locations to store
4025    34 values.
4026 
4027    When d_nnz, o_nnz parameters are specified, the storage is specified
4028    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4029    In the above case the values for d_nnz,o_nnz are:
4030 .vb
4031      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4032      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4033      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4034 .ve
4035    Here the space allocated is sum of all the above values i.e 34, and
4036    hence pre-allocation is perfect.
4037 
4038    Level: intermediate
4039 
4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4041           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4042 @*/
4043 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4044 {
4045   PetscFunctionBegin;
4046   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4047   PetscValidType(B,1);
4048   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4049   PetscFunctionReturn(0);
4050 }
4051 
4052 /*@
4053      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4054          CSR format for the local rows.
4055 
4056    Collective
4057 
4058    Input Parameters:
4059 +  comm - MPI communicator
4060 .  m - number of local rows (Cannot be PETSC_DECIDE)
4061 .  n - This value should be the same as the local size used in creating the
4062        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4063        calculated if N is given) For square matrices n is almost always m.
4064 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4065 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4066 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4067 .   j - column indices
4068 -   a - matrix values
4069 
4070    Output Parameter:
4071 .   mat - the matrix
4072 
4073    Level: intermediate
4074 
4075    Notes:
4076        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4077      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4078      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4079 
4080        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4081 
4082        The format which is used for the sparse matrix input, is equivalent to a
4083     row-major ordering.. i.e for the following matrix, the input data expected is
4084     as shown
4085 
4086        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4087 
4088 $        1 0 0
4089 $        2 0 3     P0
4090 $       -------
4091 $        4 5 6     P1
4092 $
4093 $     Process0 [P0]: rows_owned=[0,1]
4094 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4095 $        j =  {0,0,2}  [size = 3]
4096 $        v =  {1,2,3}  [size = 3]
4097 $
4098 $     Process1 [P1]: rows_owned=[2]
4099 $        i =  {0,3}    [size = nrow+1  = 1+1]
4100 $        j =  {0,1,2}  [size = 3]
4101 $        v =  {4,5,6}  [size = 3]
4102 
4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4104           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4105 @*/
4106 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4107 {
4108   PetscFunctionBegin;
4109   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4110   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4111   PetscCall(MatCreate(comm,mat));
4112   PetscCall(MatSetSizes(*mat,m,n,M,N));
4113   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4114   PetscCall(MatSetType(*mat,MATMPIAIJ));
4115   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4116   PetscFunctionReturn(0);
4117 }
4118 
4119 /*@
4120      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4121          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4122 
4123    Collective
4124 
4125    Input Parameters:
4126 +  mat - the matrix
4127 .  m - number of local rows (Cannot be PETSC_DECIDE)
4128 .  n - This value should be the same as the local size used in creating the
4129        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4130        calculated if N is given) For square matrices n is almost always m.
4131 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4132 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4133 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4134 .  J - column indices
4135 -  v - matrix values
4136 
4137    Level: intermediate
4138 
4139 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4140           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4141 @*/
4142 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4143 {
4144   PetscInt       cstart,nnz,i,j;
4145   PetscInt       *ld;
4146   PetscBool      nooffprocentries;
4147   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4148   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4149   PetscScalar    *ad,*ao;
4150   const PetscInt *Adi = Ad->i;
4151   PetscInt       ldi,Iii,md;
4152 
4153   PetscFunctionBegin;
4154   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4155   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4156   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4157   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4158 
4159   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4160   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4161   cstart = mat->cmap->rstart;
4162   if (!Aij->ld) {
4163     /* count number of entries below block diagonal */
4164     PetscCall(PetscCalloc1(m,&ld));
4165     Aij->ld = ld;
4166     for (i=0; i<m; i++) {
4167       nnz  = Ii[i+1]- Ii[i];
4168       j     = 0;
4169       while  (J[j] < cstart && j < nnz) {j++;}
4170       J    += nnz;
4171       ld[i] = j;
4172     }
4173   } else {
4174     ld = Aij->ld;
4175   }
4176 
4177   for (i=0; i<m; i++) {
4178     nnz  = Ii[i+1]- Ii[i];
4179     Iii  = Ii[i];
4180     ldi  = ld[i];
4181     md   = Adi[i+1]-Adi[i];
4182     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4183     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4184     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4185     ad  += md;
4186     ao  += nnz - md;
4187   }
4188   nooffprocentries      = mat->nooffprocentries;
4189   mat->nooffprocentries = PETSC_TRUE;
4190   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4191   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4192   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4193   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4194   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4195   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4196   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4197   mat->nooffprocentries = nooffprocentries;
4198   PetscFunctionReturn(0);
4199 }
4200 
4201 /*@C
4202    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4203    (the default parallel PETSc format).  For good matrix assembly performance
4204    the user should preallocate the matrix storage by setting the parameters
4205    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4206    performance can be increased by more than a factor of 50.
4207 
4208    Collective
4209 
4210    Input Parameters:
4211 +  comm - MPI communicator
4212 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4213            This value should be the same as the local size used in creating the
4214            y vector for the matrix-vector product y = Ax.
4215 .  n - This value should be the same as the local size used in creating the
4216        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4217        calculated if N is given) For square matrices n is almost always m.
4218 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4219 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4220 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4221            (same value is used for all local rows)
4222 .  d_nnz - array containing the number of nonzeros in the various rows of the
4223            DIAGONAL portion of the local submatrix (possibly different for each row)
4224            or NULL, if d_nz is used to specify the nonzero structure.
4225            The size of this array is equal to the number of local rows, i.e 'm'.
4226 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4227            submatrix (same value is used for all local rows).
4228 -  o_nnz - array containing the number of nonzeros in the various rows of the
4229            OFF-DIAGONAL portion of the local submatrix (possibly different for
4230            each row) or NULL, if o_nz is used to specify the nonzero
4231            structure. The size of this array is equal to the number
4232            of local rows, i.e 'm'.
4233 
4234    Output Parameter:
4235 .  A - the matrix
4236 
4237    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4238    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4239    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4240 
4241    Notes:
4242    If the *_nnz parameter is given then the *_nz parameter is ignored
4243 
4244    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4245    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4246    storage requirements for this matrix.
4247 
4248    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4249    processor than it must be used on all processors that share the object for
4250    that argument.
4251 
4252    The user MUST specify either the local or global matrix dimensions
4253    (possibly both).
4254 
4255    The parallel matrix is partitioned across processors such that the
4256    first m0 rows belong to process 0, the next m1 rows belong to
4257    process 1, the next m2 rows belong to process 2 etc.. where
4258    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4259    values corresponding to [m x N] submatrix.
4260 
4261    The columns are logically partitioned with the n0 columns belonging
4262    to 0th partition, the next n1 columns belonging to the next
4263    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4264 
4265    The DIAGONAL portion of the local submatrix on any given processor
4266    is the submatrix corresponding to the rows and columns m,n
4267    corresponding to the given processor. i.e diagonal matrix on
4268    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4269    etc. The remaining portion of the local submatrix [m x (N-n)]
4270    constitute the OFF-DIAGONAL portion. The example below better
4271    illustrates this concept.
4272 
4273    For a square global matrix we define each processor's diagonal portion
4274    to be its local rows and the corresponding columns (a square submatrix);
4275    each processor's off-diagonal portion encompasses the remainder of the
4276    local matrix (a rectangular submatrix).
4277 
4278    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4279 
4280    When calling this routine with a single process communicator, a matrix of
4281    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4282    type of communicator, use the construction mechanism
4283 .vb
4284      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4285 .ve
4286 
4287 $     MatCreate(...,&A);
4288 $     MatSetType(A,MATMPIAIJ);
4289 $     MatSetSizes(A, m,n,M,N);
4290 $     MatMPIAIJSetPreallocation(A,...);
4291 
4292    By default, this format uses inodes (identical nodes) when possible.
4293    We search for consecutive rows with the same nonzero structure, thereby
4294    reusing matrix information to achieve increased efficiency.
4295 
4296    Options Database Keys:
4297 +  -mat_no_inode  - Do not use inodes
4298 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4299 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4300         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4301         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4302 
4303    Example usage:
4304 
4305    Consider the following 8x8 matrix with 34 non-zero values, that is
4306    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4307    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4308    as follows
4309 
4310 .vb
4311             1  2  0  |  0  3  0  |  0  4
4312     Proc0   0  5  6  |  7  0  0  |  8  0
4313             9  0 10  | 11  0  0  | 12  0
4314     -------------------------------------
4315            13  0 14  | 15 16 17  |  0  0
4316     Proc1   0 18  0  | 19 20 21  |  0  0
4317             0  0  0  | 22 23  0  | 24  0
4318     -------------------------------------
4319     Proc2  25 26 27  |  0  0 28  | 29  0
4320            30  0  0  | 31 32 33  |  0 34
4321 .ve
4322 
4323    This can be represented as a collection of submatrices as
4324 
4325 .vb
4326       A B C
4327       D E F
4328       G H I
4329 .ve
4330 
4331    Where the submatrices A,B,C are owned by proc0, D,E,F are
4332    owned by proc1, G,H,I are owned by proc2.
4333 
4334    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4335    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4336    The 'M','N' parameters are 8,8, and have the same values on all procs.
4337 
4338    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4339    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4340    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4341    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4342    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4343    matrix, ans [DF] as another SeqAIJ matrix.
4344 
4345    When d_nz, o_nz parameters are specified, d_nz storage elements are
4346    allocated for every row of the local diagonal submatrix, and o_nz
4347    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4348    One way to choose d_nz and o_nz is to use the max nonzerors per local
4349    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4350    In this case, the values of d_nz,o_nz are
4351 .vb
4352      proc0 : dnz = 2, o_nz = 2
4353      proc1 : dnz = 3, o_nz = 2
4354      proc2 : dnz = 1, o_nz = 4
4355 .ve
4356    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4357    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4358    for proc3. i.e we are using 12+15+10=37 storage locations to store
4359    34 values.
4360 
4361    When d_nnz, o_nnz parameters are specified, the storage is specified
4362    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4363    In the above case the values for d_nnz,o_nnz are
4364 .vb
4365      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4366      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4367      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4368 .ve
4369    Here the space allocated is sum of all the above values i.e 34, and
4370    hence pre-allocation is perfect.
4371 
4372    Level: intermediate
4373 
4374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4375           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4376 @*/
4377 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4378 {
4379   PetscMPIInt    size;
4380 
4381   PetscFunctionBegin;
4382   PetscCall(MatCreate(comm,A));
4383   PetscCall(MatSetSizes(*A,m,n,M,N));
4384   PetscCallMPI(MPI_Comm_size(comm,&size));
4385   if (size > 1) {
4386     PetscCall(MatSetType(*A,MATMPIAIJ));
4387     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4388   } else {
4389     PetscCall(MatSetType(*A,MATSEQAIJ));
4390     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4391   }
4392   PetscFunctionReturn(0);
4393 }
4394 
4395 /*@C
4396   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4397 
4398   Not collective
4399 
4400   Input Parameter:
4401 . A - The MPIAIJ matrix
4402 
4403   Output Parameters:
4404 + Ad - The local diagonal block as a SeqAIJ matrix
4405 . Ao - The local off-diagonal block as a SeqAIJ matrix
4406 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4407 
4408   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4409   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4410   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4411   local column numbers to global column numbers in the original matrix.
4412 
4413   Level: intermediate
4414 
4415 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4416 @*/
4417 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4418 {
4419   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4420   PetscBool      flg;
4421 
4422   PetscFunctionBegin;
4423   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4424   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4425   if (Ad)     *Ad     = a->A;
4426   if (Ao)     *Ao     = a->B;
4427   if (colmap) *colmap = a->garray;
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4432 {
4433   PetscInt       m,N,i,rstart,nnz,Ii;
4434   PetscInt       *indx;
4435   PetscScalar    *values;
4436   MatType        rootType;
4437 
4438   PetscFunctionBegin;
4439   PetscCall(MatGetSize(inmat,&m,&N));
4440   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4441     PetscInt       *dnz,*onz,sum,bs,cbs;
4442 
4443     if (n == PETSC_DECIDE) {
4444       PetscCall(PetscSplitOwnership(comm,&n,&N));
4445     }
4446     /* Check sum(n) = N */
4447     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4448     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4449 
4450     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4451     rstart -= m;
4452 
4453     MatPreallocateBegin(comm,m,n,dnz,onz);
4454     for (i=0; i<m; i++) {
4455       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4456       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4457       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4458     }
4459 
4460     PetscCall(MatCreate(comm,outmat));
4461     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4462     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4463     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4464     PetscCall(MatGetRootType_Private(inmat,&rootType));
4465     PetscCall(MatSetType(*outmat,rootType));
4466     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4467     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4468     MatPreallocateEnd(dnz,onz);
4469     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4470   }
4471 
4472   /* numeric phase */
4473   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4474   for (i=0; i<m; i++) {
4475     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4476     Ii   = i + rstart;
4477     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4478     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4479   }
4480   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4481   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4482   PetscFunctionReturn(0);
4483 }
4484 
4485 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4486 {
4487   PetscMPIInt       rank;
4488   PetscInt          m,N,i,rstart,nnz;
4489   size_t            len;
4490   const PetscInt    *indx;
4491   PetscViewer       out;
4492   char              *name;
4493   Mat               B;
4494   const PetscScalar *values;
4495 
4496   PetscFunctionBegin;
4497   PetscCall(MatGetLocalSize(A,&m,NULL));
4498   PetscCall(MatGetSize(A,NULL,&N));
4499   /* Should this be the type of the diagonal block of A? */
4500   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4501   PetscCall(MatSetSizes(B,m,N,m,N));
4502   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4503   PetscCall(MatSetType(B,MATSEQAIJ));
4504   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4505   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4506   for (i=0; i<m; i++) {
4507     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4508     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4509     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4510   }
4511   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4512   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4513 
4514   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4515   PetscCall(PetscStrlen(outfile,&len));
4516   PetscCall(PetscMalloc1(len+6,&name));
4517   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4518   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4519   PetscCall(PetscFree(name));
4520   PetscCall(MatView(B,out));
4521   PetscCall(PetscViewerDestroy(&out));
4522   PetscCall(MatDestroy(&B));
4523   PetscFunctionReturn(0);
4524 }
4525 
4526 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4527 {
4528   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4529 
4530   PetscFunctionBegin;
4531   if (!merge) PetscFunctionReturn(0);
4532   PetscCall(PetscFree(merge->id_r));
4533   PetscCall(PetscFree(merge->len_s));
4534   PetscCall(PetscFree(merge->len_r));
4535   PetscCall(PetscFree(merge->bi));
4536   PetscCall(PetscFree(merge->bj));
4537   PetscCall(PetscFree(merge->buf_ri[0]));
4538   PetscCall(PetscFree(merge->buf_ri));
4539   PetscCall(PetscFree(merge->buf_rj[0]));
4540   PetscCall(PetscFree(merge->buf_rj));
4541   PetscCall(PetscFree(merge->coi));
4542   PetscCall(PetscFree(merge->coj));
4543   PetscCall(PetscFree(merge->owners_co));
4544   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4545   PetscCall(PetscFree(merge));
4546   PetscFunctionReturn(0);
4547 }
4548 
4549 #include <../src/mat/utils/freespace.h>
4550 #include <petscbt.h>
4551 
4552 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4553 {
4554   MPI_Comm            comm;
4555   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4556   PetscMPIInt         size,rank,taga,*len_s;
4557   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4558   PetscInt            proc,m;
4559   PetscInt            **buf_ri,**buf_rj;
4560   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4561   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4562   MPI_Request         *s_waits,*r_waits;
4563   MPI_Status          *status;
4564   const MatScalar     *aa,*a_a;
4565   MatScalar           **abuf_r,*ba_i;
4566   Mat_Merge_SeqsToMPI *merge;
4567   PetscContainer      container;
4568 
4569   PetscFunctionBegin;
4570   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4571   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4572 
4573   PetscCallMPI(MPI_Comm_size(comm,&size));
4574   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4575 
4576   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4577   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4578   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4579   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4580   aa   = a_a;
4581 
4582   bi     = merge->bi;
4583   bj     = merge->bj;
4584   buf_ri = merge->buf_ri;
4585   buf_rj = merge->buf_rj;
4586 
4587   PetscCall(PetscMalloc1(size,&status));
4588   owners = merge->rowmap->range;
4589   len_s  = merge->len_s;
4590 
4591   /* send and recv matrix values */
4592   /*-----------------------------*/
4593   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4594   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4595 
4596   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4597   for (proc=0,k=0; proc<size; proc++) {
4598     if (!len_s[proc]) continue;
4599     i    = owners[proc];
4600     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4601     k++;
4602   }
4603 
4604   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4605   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4606   PetscCall(PetscFree(status));
4607 
4608   PetscCall(PetscFree(s_waits));
4609   PetscCall(PetscFree(r_waits));
4610 
4611   /* insert mat values of mpimat */
4612   /*----------------------------*/
4613   PetscCall(PetscMalloc1(N,&ba_i));
4614   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4615 
4616   for (k=0; k<merge->nrecv; k++) {
4617     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4618     nrows       = *(buf_ri_k[k]);
4619     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4620     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4621   }
4622 
4623   /* set values of ba */
4624   m    = merge->rowmap->n;
4625   for (i=0; i<m; i++) {
4626     arow = owners[rank] + i;
4627     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4628     bnzi = bi[i+1] - bi[i];
4629     PetscCall(PetscArrayzero(ba_i,bnzi));
4630 
4631     /* add local non-zero vals of this proc's seqmat into ba */
4632     anzi   = ai[arow+1] - ai[arow];
4633     aj     = a->j + ai[arow];
4634     aa     = a_a + ai[arow];
4635     nextaj = 0;
4636     for (j=0; nextaj<anzi; j++) {
4637       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4638         ba_i[j] += aa[nextaj++];
4639       }
4640     }
4641 
4642     /* add received vals into ba */
4643     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4644       /* i-th row */
4645       if (i == *nextrow[k]) {
4646         anzi   = *(nextai[k]+1) - *nextai[k];
4647         aj     = buf_rj[k] + *(nextai[k]);
4648         aa     = abuf_r[k] + *(nextai[k]);
4649         nextaj = 0;
4650         for (j=0; nextaj<anzi; j++) {
4651           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4652             ba_i[j] += aa[nextaj++];
4653           }
4654         }
4655         nextrow[k]++; nextai[k]++;
4656       }
4657     }
4658     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4659   }
4660   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4661   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4662   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4663 
4664   PetscCall(PetscFree(abuf_r[0]));
4665   PetscCall(PetscFree(abuf_r));
4666   PetscCall(PetscFree(ba_i));
4667   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4668   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4669   PetscFunctionReturn(0);
4670 }
4671 
4672 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4673 {
4674   Mat                 B_mpi;
4675   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4676   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4677   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4678   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4679   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4680   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4681   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4682   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4683   MPI_Status          *status;
4684   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4685   PetscBT             lnkbt;
4686   Mat_Merge_SeqsToMPI *merge;
4687   PetscContainer      container;
4688 
4689   PetscFunctionBegin;
4690   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4691 
4692   /* make sure it is a PETSc comm */
4693   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4694   PetscCallMPI(MPI_Comm_size(comm,&size));
4695   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4696 
4697   PetscCall(PetscNew(&merge));
4698   PetscCall(PetscMalloc1(size,&status));
4699 
4700   /* determine row ownership */
4701   /*---------------------------------------------------------*/
4702   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4703   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4704   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4705   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4706   PetscCall(PetscLayoutSetUp(merge->rowmap));
4707   PetscCall(PetscMalloc1(size,&len_si));
4708   PetscCall(PetscMalloc1(size,&merge->len_s));
4709 
4710   m      = merge->rowmap->n;
4711   owners = merge->rowmap->range;
4712 
4713   /* determine the number of messages to send, their lengths */
4714   /*---------------------------------------------------------*/
4715   len_s = merge->len_s;
4716 
4717   len          = 0; /* length of buf_si[] */
4718   merge->nsend = 0;
4719   for (proc=0; proc<size; proc++) {
4720     len_si[proc] = 0;
4721     if (proc == rank) {
4722       len_s[proc] = 0;
4723     } else {
4724       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4725       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4726     }
4727     if (len_s[proc]) {
4728       merge->nsend++;
4729       nrows = 0;
4730       for (i=owners[proc]; i<owners[proc+1]; i++) {
4731         if (ai[i+1] > ai[i]) nrows++;
4732       }
4733       len_si[proc] = 2*(nrows+1);
4734       len         += len_si[proc];
4735     }
4736   }
4737 
4738   /* determine the number and length of messages to receive for ij-structure */
4739   /*-------------------------------------------------------------------------*/
4740   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4741   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4742 
4743   /* post the Irecv of j-structure */
4744   /*-------------------------------*/
4745   PetscCall(PetscCommGetNewTag(comm,&tagj));
4746   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4747 
4748   /* post the Isend of j-structure */
4749   /*--------------------------------*/
4750   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4751 
4752   for (proc=0, k=0; proc<size; proc++) {
4753     if (!len_s[proc]) continue;
4754     i    = owners[proc];
4755     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4756     k++;
4757   }
4758 
4759   /* receives and sends of j-structure are complete */
4760   /*------------------------------------------------*/
4761   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4762   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4763 
4764   /* send and recv i-structure */
4765   /*---------------------------*/
4766   PetscCall(PetscCommGetNewTag(comm,&tagi));
4767   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4768 
4769   PetscCall(PetscMalloc1(len+1,&buf_s));
4770   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4771   for (proc=0,k=0; proc<size; proc++) {
4772     if (!len_s[proc]) continue;
4773     /* form outgoing message for i-structure:
4774          buf_si[0]:                 nrows to be sent
4775                [1:nrows]:           row index (global)
4776                [nrows+1:2*nrows+1]: i-structure index
4777     */
4778     /*-------------------------------------------*/
4779     nrows       = len_si[proc]/2 - 1;
4780     buf_si_i    = buf_si + nrows+1;
4781     buf_si[0]   = nrows;
4782     buf_si_i[0] = 0;
4783     nrows       = 0;
4784     for (i=owners[proc]; i<owners[proc+1]; i++) {
4785       anzi = ai[i+1] - ai[i];
4786       if (anzi) {
4787         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4788         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4789         nrows++;
4790       }
4791     }
4792     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4793     k++;
4794     buf_si += len_si[proc];
4795   }
4796 
4797   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4798   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4799 
4800   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4801   for (i=0; i<merge->nrecv; i++) {
4802     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4803   }
4804 
4805   PetscCall(PetscFree(len_si));
4806   PetscCall(PetscFree(len_ri));
4807   PetscCall(PetscFree(rj_waits));
4808   PetscCall(PetscFree2(si_waits,sj_waits));
4809   PetscCall(PetscFree(ri_waits));
4810   PetscCall(PetscFree(buf_s));
4811   PetscCall(PetscFree(status));
4812 
4813   /* compute a local seq matrix in each processor */
4814   /*----------------------------------------------*/
4815   /* allocate bi array and free space for accumulating nonzero column info */
4816   PetscCall(PetscMalloc1(m+1,&bi));
4817   bi[0] = 0;
4818 
4819   /* create and initialize a linked list */
4820   nlnk = N+1;
4821   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4822 
4823   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4824   len  = ai[owners[rank+1]] - ai[owners[rank]];
4825   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4826 
4827   current_space = free_space;
4828 
4829   /* determine symbolic info for each local row */
4830   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4831 
4832   for (k=0; k<merge->nrecv; k++) {
4833     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4834     nrows       = *buf_ri_k[k];
4835     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4836     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4837   }
4838 
4839   MatPreallocateBegin(comm,m,n,dnz,onz);
4840   len  = 0;
4841   for (i=0; i<m; i++) {
4842     bnzi = 0;
4843     /* add local non-zero cols of this proc's seqmat into lnk */
4844     arow  = owners[rank] + i;
4845     anzi  = ai[arow+1] - ai[arow];
4846     aj    = a->j + ai[arow];
4847     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4848     bnzi += nlnk;
4849     /* add received col data into lnk */
4850     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4851       if (i == *nextrow[k]) { /* i-th row */
4852         anzi  = *(nextai[k]+1) - *nextai[k];
4853         aj    = buf_rj[k] + *nextai[k];
4854         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4855         bnzi += nlnk;
4856         nextrow[k]++; nextai[k]++;
4857       }
4858     }
4859     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4860 
4861     /* if free space is not available, make more free space */
4862     if (current_space->local_remaining<bnzi) {
4863       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4864       nspacedouble++;
4865     }
4866     /* copy data into free space, then initialize lnk */
4867     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4868     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4869 
4870     current_space->array           += bnzi;
4871     current_space->local_used      += bnzi;
4872     current_space->local_remaining -= bnzi;
4873 
4874     bi[i+1] = bi[i] + bnzi;
4875   }
4876 
4877   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4878 
4879   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4880   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4881   PetscCall(PetscLLDestroy(lnk,lnkbt));
4882 
4883   /* create symbolic parallel matrix B_mpi */
4884   /*---------------------------------------*/
4885   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4886   PetscCall(MatCreate(comm,&B_mpi));
4887   if (n==PETSC_DECIDE) {
4888     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4889   } else {
4890     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4891   }
4892   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4893   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4894   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4895   MatPreallocateEnd(dnz,onz);
4896   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4897 
4898   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4899   B_mpi->assembled  = PETSC_FALSE;
4900   merge->bi         = bi;
4901   merge->bj         = bj;
4902   merge->buf_ri     = buf_ri;
4903   merge->buf_rj     = buf_rj;
4904   merge->coi        = NULL;
4905   merge->coj        = NULL;
4906   merge->owners_co  = NULL;
4907 
4908   PetscCall(PetscCommDestroy(&comm));
4909 
4910   /* attach the supporting struct to B_mpi for reuse */
4911   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4912   PetscCall(PetscContainerSetPointer(container,merge));
4913   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4914   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4915   PetscCall(PetscContainerDestroy(&container));
4916   *mpimat = B_mpi;
4917 
4918   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4919   PetscFunctionReturn(0);
4920 }
4921 
4922 /*@C
4923       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4924                  matrices from each processor
4925 
4926     Collective
4927 
4928    Input Parameters:
4929 +    comm - the communicators the parallel matrix will live on
4930 .    seqmat - the input sequential matrices
4931 .    m - number of local rows (or PETSC_DECIDE)
4932 .    n - number of local columns (or PETSC_DECIDE)
4933 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4934 
4935    Output Parameter:
4936 .    mpimat - the parallel matrix generated
4937 
4938     Level: advanced
4939 
4940    Notes:
4941      The dimensions of the sequential matrix in each processor MUST be the same.
4942      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4943      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4944 @*/
4945 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4946 {
4947   PetscMPIInt    size;
4948 
4949   PetscFunctionBegin;
4950   PetscCallMPI(MPI_Comm_size(comm,&size));
4951   if (size == 1) {
4952     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4953     if (scall == MAT_INITIAL_MATRIX) {
4954       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4955     } else {
4956       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4957     }
4958     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4959     PetscFunctionReturn(0);
4960   }
4961   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4962   if (scall == MAT_INITIAL_MATRIX) {
4963     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4964   }
4965   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4966   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4967   PetscFunctionReturn(0);
4968 }
4969 
4970 /*@
4971      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4972           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4973           with MatGetSize()
4974 
4975     Not Collective
4976 
4977    Input Parameters:
4978 +    A - the matrix
4979 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4980 
4981    Output Parameter:
4982 .    A_loc - the local sequential matrix generated
4983 
4984     Level: developer
4985 
4986    Notes:
4987      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4988      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4989      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4990      modify the values of the returned A_loc.
4991 
4992 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
4993 @*/
4994 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4995 {
4996   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
4997   Mat_SeqAIJ        *mat,*a,*b;
4998   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4999   const PetscScalar *aa,*ba,*aav,*bav;
5000   PetscScalar       *ca,*cam;
5001   PetscMPIInt       size;
5002   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5003   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5004   PetscBool         match;
5005 
5006   PetscFunctionBegin;
5007   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5008   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5009   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5010   if (size == 1) {
5011     if (scall == MAT_INITIAL_MATRIX) {
5012       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5013       *A_loc = mpimat->A;
5014     } else if (scall == MAT_REUSE_MATRIX) {
5015       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5016     }
5017     PetscFunctionReturn(0);
5018   }
5019 
5020   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5021   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5022   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5023   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5024   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5025   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5026   aa   = aav;
5027   ba   = bav;
5028   if (scall == MAT_INITIAL_MATRIX) {
5029     PetscCall(PetscMalloc1(1+am,&ci));
5030     ci[0] = 0;
5031     for (i=0; i<am; i++) {
5032       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5033     }
5034     PetscCall(PetscMalloc1(1+ci[am],&cj));
5035     PetscCall(PetscMalloc1(1+ci[am],&ca));
5036     k    = 0;
5037     for (i=0; i<am; i++) {
5038       ncols_o = bi[i+1] - bi[i];
5039       ncols_d = ai[i+1] - ai[i];
5040       /* off-diagonal portion of A */
5041       for (jo=0; jo<ncols_o; jo++) {
5042         col = cmap[*bj];
5043         if (col >= cstart) break;
5044         cj[k]   = col; bj++;
5045         ca[k++] = *ba++;
5046       }
5047       /* diagonal portion of A */
5048       for (j=0; j<ncols_d; j++) {
5049         cj[k]   = cstart + *aj++;
5050         ca[k++] = *aa++;
5051       }
5052       /* off-diagonal portion of A */
5053       for (j=jo; j<ncols_o; j++) {
5054         cj[k]   = cmap[*bj++];
5055         ca[k++] = *ba++;
5056       }
5057     }
5058     /* put together the new matrix */
5059     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5060     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5061     /* Since these are PETSc arrays, change flags to free them as necessary. */
5062     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5063     mat->free_a  = PETSC_TRUE;
5064     mat->free_ij = PETSC_TRUE;
5065     mat->nonew   = 0;
5066   } else if (scall == MAT_REUSE_MATRIX) {
5067     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5068     ci   = mat->i;
5069     cj   = mat->j;
5070     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5071     for (i=0; i<am; i++) {
5072       /* off-diagonal portion of A */
5073       ncols_o = bi[i+1] - bi[i];
5074       for (jo=0; jo<ncols_o; jo++) {
5075         col = cmap[*bj];
5076         if (col >= cstart) break;
5077         *cam++ = *ba++; bj++;
5078       }
5079       /* diagonal portion of A */
5080       ncols_d = ai[i+1] - ai[i];
5081       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5082       /* off-diagonal portion of A */
5083       for (j=jo; j<ncols_o; j++) {
5084         *cam++ = *ba++; bj++;
5085       }
5086     }
5087     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5088   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5089   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5090   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5091   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5092   PetscFunctionReturn(0);
5093 }
5094 
5095 /*@
5096      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5097           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5098 
5099     Not Collective
5100 
5101    Input Parameters:
5102 +    A - the matrix
5103 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 
5105    Output Parameters:
5106 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5107 -    A_loc - the local sequential matrix generated
5108 
5109     Level: developer
5110 
5111    Notes:
5112      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5113 
5114 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5115 
5116 @*/
5117 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5118 {
5119   Mat            Ao,Ad;
5120   const PetscInt *cmap;
5121   PetscMPIInt    size;
5122   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5123 
5124   PetscFunctionBegin;
5125   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5126   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5127   if (size == 1) {
5128     if (scall == MAT_INITIAL_MATRIX) {
5129       PetscCall(PetscObjectReference((PetscObject)Ad));
5130       *A_loc = Ad;
5131     } else if (scall == MAT_REUSE_MATRIX) {
5132       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5133     }
5134     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5135     PetscFunctionReturn(0);
5136   }
5137   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5138   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5139   if (f) {
5140     PetscCall((*f)(A,scall,glob,A_loc));
5141   } else {
5142     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5143     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5144     Mat_SeqAIJ        *c;
5145     PetscInt          *ai = a->i, *aj = a->j;
5146     PetscInt          *bi = b->i, *bj = b->j;
5147     PetscInt          *ci,*cj;
5148     const PetscScalar *aa,*ba;
5149     PetscScalar       *ca;
5150     PetscInt          i,j,am,dn,on;
5151 
5152     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5153     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5154     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5155     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5156     if (scall == MAT_INITIAL_MATRIX) {
5157       PetscInt k;
5158       PetscCall(PetscMalloc1(1+am,&ci));
5159       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5160       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5161       ci[0] = 0;
5162       for (i=0,k=0; i<am; i++) {
5163         const PetscInt ncols_o = bi[i+1] - bi[i];
5164         const PetscInt ncols_d = ai[i+1] - ai[i];
5165         ci[i+1] = ci[i] + ncols_o + ncols_d;
5166         /* diagonal portion of A */
5167         for (j=0; j<ncols_d; j++,k++) {
5168           cj[k] = *aj++;
5169           ca[k] = *aa++;
5170         }
5171         /* off-diagonal portion of A */
5172         for (j=0; j<ncols_o; j++,k++) {
5173           cj[k] = dn + *bj++;
5174           ca[k] = *ba++;
5175         }
5176       }
5177       /* put together the new matrix */
5178       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5179       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5180       /* Since these are PETSc arrays, change flags to free them as necessary. */
5181       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5182       c->free_a  = PETSC_TRUE;
5183       c->free_ij = PETSC_TRUE;
5184       c->nonew   = 0;
5185       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5186     } else if (scall == MAT_REUSE_MATRIX) {
5187       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5188       for (i=0; i<am; i++) {
5189         const PetscInt ncols_d = ai[i+1] - ai[i];
5190         const PetscInt ncols_o = bi[i+1] - bi[i];
5191         /* diagonal portion of A */
5192         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5193         /* off-diagonal portion of A */
5194         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5195       }
5196       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5197     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5198     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5199     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5200     if (glob) {
5201       PetscInt cst, *gidx;
5202 
5203       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5204       PetscCall(PetscMalloc1(dn+on,&gidx));
5205       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5206       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5207       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5208     }
5209   }
5210   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5211   PetscFunctionReturn(0);
5212 }
5213 
5214 /*@C
5215      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5216 
5217     Not Collective
5218 
5219    Input Parameters:
5220 +    A - the matrix
5221 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5222 -    row, col - index sets of rows and columns to extract (or NULL)
5223 
5224    Output Parameter:
5225 .    A_loc - the local sequential matrix generated
5226 
5227     Level: developer
5228 
5229 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5230 
5231 @*/
5232 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5233 {
5234   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5235   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5236   IS             isrowa,iscola;
5237   Mat            *aloc;
5238   PetscBool      match;
5239 
5240   PetscFunctionBegin;
5241   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5242   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5243   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5244   if (!row) {
5245     start = A->rmap->rstart; end = A->rmap->rend;
5246     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5247   } else {
5248     isrowa = *row;
5249   }
5250   if (!col) {
5251     start = A->cmap->rstart;
5252     cmap  = a->garray;
5253     nzA   = a->A->cmap->n;
5254     nzB   = a->B->cmap->n;
5255     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5256     ncols = 0;
5257     for (i=0; i<nzB; i++) {
5258       if (cmap[i] < start) idx[ncols++] = cmap[i];
5259       else break;
5260     }
5261     imark = i;
5262     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5263     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5264     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5265   } else {
5266     iscola = *col;
5267   }
5268   if (scall != MAT_INITIAL_MATRIX) {
5269     PetscCall(PetscMalloc1(1,&aloc));
5270     aloc[0] = *A_loc;
5271   }
5272   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5273   if (!col) { /* attach global id of condensed columns */
5274     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5275   }
5276   *A_loc = aloc[0];
5277   PetscCall(PetscFree(aloc));
5278   if (!row) {
5279     PetscCall(ISDestroy(&isrowa));
5280   }
5281   if (!col) {
5282     PetscCall(ISDestroy(&iscola));
5283   }
5284   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5285   PetscFunctionReturn(0);
5286 }
5287 
5288 /*
5289  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5290  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5291  * on a global size.
5292  * */
5293 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5294 {
5295   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5296   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5297   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5298   PetscMPIInt              owner;
5299   PetscSFNode              *iremote,*oiremote;
5300   const PetscInt           *lrowindices;
5301   PetscSF                  sf,osf;
5302   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5303   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5304   MPI_Comm                 comm;
5305   ISLocalToGlobalMapping   mapping;
5306   const PetscScalar        *pd_a,*po_a;
5307 
5308   PetscFunctionBegin;
5309   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5310   /* plocalsize is the number of roots
5311    * nrows is the number of leaves
5312    * */
5313   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5314   PetscCall(ISGetLocalSize(rows,&nrows));
5315   PetscCall(PetscCalloc1(nrows,&iremote));
5316   PetscCall(ISGetIndices(rows,&lrowindices));
5317   for (i=0;i<nrows;i++) {
5318     /* Find a remote index and an owner for a row
5319      * The row could be local or remote
5320      * */
5321     owner = 0;
5322     lidx  = 0;
5323     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5324     iremote[i].index = lidx;
5325     iremote[i].rank  = owner;
5326   }
5327   /* Create SF to communicate how many nonzero columns for each row */
5328   PetscCall(PetscSFCreate(comm,&sf));
5329   /* SF will figure out the number of nonzero colunms for each row, and their
5330    * offsets
5331    * */
5332   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5333   PetscCall(PetscSFSetFromOptions(sf));
5334   PetscCall(PetscSFSetUp(sf));
5335 
5336   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5337   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5338   PetscCall(PetscCalloc1(nrows,&pnnz));
5339   roffsets[0] = 0;
5340   roffsets[1] = 0;
5341   for (i=0;i<plocalsize;i++) {
5342     /* diag */
5343     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5344     /* off diag */
5345     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5346     /* compute offsets so that we relative location for each row */
5347     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5348     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5349   }
5350   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5351   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5352   /* 'r' means root, and 'l' means leaf */
5353   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5354   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5355   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5356   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5357   PetscCall(PetscSFDestroy(&sf));
5358   PetscCall(PetscFree(roffsets));
5359   PetscCall(PetscFree(nrcols));
5360   dntotalcols = 0;
5361   ontotalcols = 0;
5362   ncol = 0;
5363   for (i=0;i<nrows;i++) {
5364     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5365     ncol = PetscMax(pnnz[i],ncol);
5366     /* diag */
5367     dntotalcols += nlcols[i*2+0];
5368     /* off diag */
5369     ontotalcols += nlcols[i*2+1];
5370   }
5371   /* We do not need to figure the right number of columns
5372    * since all the calculations will be done by going through the raw data
5373    * */
5374   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5375   PetscCall(MatSetUp(*P_oth));
5376   PetscCall(PetscFree(pnnz));
5377   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5378   /* diag */
5379   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5380   /* off diag */
5381   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5382   /* diag */
5383   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5384   /* off diag */
5385   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5386   dntotalcols = 0;
5387   ontotalcols = 0;
5388   ntotalcols  = 0;
5389   for (i=0;i<nrows;i++) {
5390     owner = 0;
5391     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5392     /* Set iremote for diag matrix */
5393     for (j=0;j<nlcols[i*2+0];j++) {
5394       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5395       iremote[dntotalcols].rank    = owner;
5396       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5397       ilocal[dntotalcols++]        = ntotalcols++;
5398     }
5399     /* off diag */
5400     for (j=0;j<nlcols[i*2+1];j++) {
5401       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5402       oiremote[ontotalcols].rank    = owner;
5403       oilocal[ontotalcols++]        = ntotalcols++;
5404     }
5405   }
5406   PetscCall(ISRestoreIndices(rows,&lrowindices));
5407   PetscCall(PetscFree(loffsets));
5408   PetscCall(PetscFree(nlcols));
5409   PetscCall(PetscSFCreate(comm,&sf));
5410   /* P serves as roots and P_oth is leaves
5411    * Diag matrix
5412    * */
5413   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5414   PetscCall(PetscSFSetFromOptions(sf));
5415   PetscCall(PetscSFSetUp(sf));
5416 
5417   PetscCall(PetscSFCreate(comm,&osf));
5418   /* Off diag */
5419   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5420   PetscCall(PetscSFSetFromOptions(osf));
5421   PetscCall(PetscSFSetUp(osf));
5422   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5423   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5424   /* We operate on the matrix internal data for saving memory */
5425   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5426   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5427   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5428   /* Convert to global indices for diag matrix */
5429   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5430   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5431   /* We want P_oth store global indices */
5432   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5433   /* Use memory scalable approach */
5434   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5435   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5436   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5437   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5438   /* Convert back to local indices */
5439   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5440   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5441   nout = 0;
5442   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5443   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5444   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5445   /* Exchange values */
5446   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5447   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5448   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5449   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5450   /* Stop PETSc from shrinking memory */
5451   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5452   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5453   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5454   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5455   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5456   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5457   PetscCall(PetscSFDestroy(&sf));
5458   PetscCall(PetscSFDestroy(&osf));
5459   PetscFunctionReturn(0);
5460 }
5461 
5462 /*
5463  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5464  * This supports MPIAIJ and MAIJ
5465  * */
5466 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5467 {
5468   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5469   Mat_SeqAIJ            *p_oth;
5470   IS                    rows,map;
5471   PetscHMapI            hamp;
5472   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5473   MPI_Comm              comm;
5474   PetscSF               sf,osf;
5475   PetscBool             has;
5476 
5477   PetscFunctionBegin;
5478   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5479   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5480   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5481    *  and then create a submatrix (that often is an overlapping matrix)
5482    * */
5483   if (reuse == MAT_INITIAL_MATRIX) {
5484     /* Use a hash table to figure out unique keys */
5485     PetscCall(PetscHMapICreate(&hamp));
5486     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5487     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5488     count = 0;
5489     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5490     for (i=0;i<a->B->cmap->n;i++) {
5491       key  = a->garray[i]/dof;
5492       PetscCall(PetscHMapIHas(hamp,key,&has));
5493       if (!has) {
5494         mapping[i] = count;
5495         PetscCall(PetscHMapISet(hamp,key,count++));
5496       } else {
5497         /* Current 'i' has the same value the previous step */
5498         mapping[i] = count-1;
5499       }
5500     }
5501     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5502     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5503     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5504     PetscCall(PetscCalloc1(htsize,&rowindices));
5505     off = 0;
5506     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5507     PetscCall(PetscHMapIDestroy(&hamp));
5508     PetscCall(PetscSortInt(htsize,rowindices));
5509     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5510     /* In case, the matrix was already created but users want to recreate the matrix */
5511     PetscCall(MatDestroy(P_oth));
5512     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5513     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5514     PetscCall(ISDestroy(&map));
5515     PetscCall(ISDestroy(&rows));
5516   } else if (reuse == MAT_REUSE_MATRIX) {
5517     /* If matrix was already created, we simply update values using SF objects
5518      * that as attached to the matrix ealier.
5519      */
5520     const PetscScalar *pd_a,*po_a;
5521 
5522     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5523     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5524     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5525     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5526     /* Update values in place */
5527     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5528     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5529     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5530     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5531     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5532     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5533     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5534     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5535   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5536   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5537   PetscFunctionReturn(0);
5538 }
5539 
5540 /*@C
5541   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5542 
5543   Collective on Mat
5544 
5545   Input Parameters:
5546 + A - the first matrix in mpiaij format
5547 . B - the second matrix in mpiaij format
5548 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5549 
5550   Output Parameters:
5551 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5552 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5553 - B_seq - the sequential matrix generated
5554 
5555   Level: developer
5556 
5557 @*/
5558 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5559 {
5560   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5561   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5562   IS             isrowb,iscolb;
5563   Mat            *bseq=NULL;
5564 
5565   PetscFunctionBegin;
5566   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5567     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5568   }
5569   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5570 
5571   if (scall == MAT_INITIAL_MATRIX) {
5572     start = A->cmap->rstart;
5573     cmap  = a->garray;
5574     nzA   = a->A->cmap->n;
5575     nzB   = a->B->cmap->n;
5576     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5577     ncols = 0;
5578     for (i=0; i<nzB; i++) {  /* row < local row index */
5579       if (cmap[i] < start) idx[ncols++] = cmap[i];
5580       else break;
5581     }
5582     imark = i;
5583     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5584     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5585     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5586     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5587   } else {
5588     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5589     isrowb  = *rowb; iscolb = *colb;
5590     PetscCall(PetscMalloc1(1,&bseq));
5591     bseq[0] = *B_seq;
5592   }
5593   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5594   *B_seq = bseq[0];
5595   PetscCall(PetscFree(bseq));
5596   if (!rowb) {
5597     PetscCall(ISDestroy(&isrowb));
5598   } else {
5599     *rowb = isrowb;
5600   }
5601   if (!colb) {
5602     PetscCall(ISDestroy(&iscolb));
5603   } else {
5604     *colb = iscolb;
5605   }
5606   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5607   PetscFunctionReturn(0);
5608 }
5609 
5610 /*
5611     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5612     of the OFF-DIAGONAL portion of local A
5613 
5614     Collective on Mat
5615 
5616    Input Parameters:
5617 +    A,B - the matrices in mpiaij format
5618 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5619 
5620    Output Parameter:
5621 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5622 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5623 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5624 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5625 
5626     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5627      for this matrix. This is not desirable..
5628 
5629     Level: developer
5630 
5631 */
5632 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5633 {
5634   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5635   Mat_SeqAIJ             *b_oth;
5636   VecScatter             ctx;
5637   MPI_Comm               comm;
5638   const PetscMPIInt      *rprocs,*sprocs;
5639   const PetscInt         *srow,*rstarts,*sstarts;
5640   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5641   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5642   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5643   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5644   PetscMPIInt            size,tag,rank,nreqs;
5645 
5646   PetscFunctionBegin;
5647   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5648   PetscCallMPI(MPI_Comm_size(comm,&size));
5649 
5650   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5651     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5652   }
5653   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5654   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5655 
5656   if (size == 1) {
5657     startsj_s = NULL;
5658     bufa_ptr  = NULL;
5659     *B_oth    = NULL;
5660     PetscFunctionReturn(0);
5661   }
5662 
5663   ctx = a->Mvctx;
5664   tag = ((PetscObject)ctx)->tag;
5665 
5666   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5667   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5668   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5669   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5670   PetscCall(PetscMalloc1(nreqs,&reqs));
5671   rwaits = reqs;
5672   swaits = reqs + nrecvs;
5673 
5674   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5675   if (scall == MAT_INITIAL_MATRIX) {
5676     /* i-array */
5677     /*---------*/
5678     /*  post receives */
5679     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5680     for (i=0; i<nrecvs; i++) {
5681       rowlen = rvalues + rstarts[i]*rbs;
5682       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5683       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5684     }
5685 
5686     /* pack the outgoing message */
5687     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5688 
5689     sstartsj[0] = 0;
5690     rstartsj[0] = 0;
5691     len         = 0; /* total length of j or a array to be sent */
5692     if (nsends) {
5693       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5694       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5695     }
5696     for (i=0; i<nsends; i++) {
5697       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5698       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5699       for (j=0; j<nrows; j++) {
5700         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5701         for (l=0; l<sbs; l++) {
5702           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5703 
5704           rowlen[j*sbs+l] = ncols;
5705 
5706           len += ncols;
5707           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5708         }
5709         k++;
5710       }
5711       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5712 
5713       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5714     }
5715     /* recvs and sends of i-array are completed */
5716     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5717     PetscCall(PetscFree(svalues));
5718 
5719     /* allocate buffers for sending j and a arrays */
5720     PetscCall(PetscMalloc1(len+1,&bufj));
5721     PetscCall(PetscMalloc1(len+1,&bufa));
5722 
5723     /* create i-array of B_oth */
5724     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5725 
5726     b_othi[0] = 0;
5727     len       = 0; /* total length of j or a array to be received */
5728     k         = 0;
5729     for (i=0; i<nrecvs; i++) {
5730       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5731       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5732       for (j=0; j<nrows; j++) {
5733         b_othi[k+1] = b_othi[k] + rowlen[j];
5734         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5735         k++;
5736       }
5737       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5738     }
5739     PetscCall(PetscFree(rvalues));
5740 
5741     /* allocate space for j and a arrrays of B_oth */
5742     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5743     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5744 
5745     /* j-array */
5746     /*---------*/
5747     /*  post receives of j-array */
5748     for (i=0; i<nrecvs; i++) {
5749       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5750       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5751     }
5752 
5753     /* pack the outgoing message j-array */
5754     if (nsends) k = sstarts[0];
5755     for (i=0; i<nsends; i++) {
5756       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5757       bufJ  = bufj+sstartsj[i];
5758       for (j=0; j<nrows; j++) {
5759         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5760         for (ll=0; ll<sbs; ll++) {
5761           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5762           for (l=0; l<ncols; l++) {
5763             *bufJ++ = cols[l];
5764           }
5765           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5766         }
5767       }
5768       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5769     }
5770 
5771     /* recvs and sends of j-array are completed */
5772     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5773   } else if (scall == MAT_REUSE_MATRIX) {
5774     sstartsj = *startsj_s;
5775     rstartsj = *startsj_r;
5776     bufa     = *bufa_ptr;
5777     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5778     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5779   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5780 
5781   /* a-array */
5782   /*---------*/
5783   /*  post receives of a-array */
5784   for (i=0; i<nrecvs; i++) {
5785     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5786     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5787   }
5788 
5789   /* pack the outgoing message a-array */
5790   if (nsends) k = sstarts[0];
5791   for (i=0; i<nsends; i++) {
5792     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5793     bufA  = bufa+sstartsj[i];
5794     for (j=0; j<nrows; j++) {
5795       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5796       for (ll=0; ll<sbs; ll++) {
5797         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5798         for (l=0; l<ncols; l++) {
5799           *bufA++ = vals[l];
5800         }
5801         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5802       }
5803     }
5804     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5805   }
5806   /* recvs and sends of a-array are completed */
5807   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5808   PetscCall(PetscFree(reqs));
5809 
5810   if (scall == MAT_INITIAL_MATRIX) {
5811     /* put together the new matrix */
5812     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5813 
5814     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5815     /* Since these are PETSc arrays, change flags to free them as necessary. */
5816     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5817     b_oth->free_a  = PETSC_TRUE;
5818     b_oth->free_ij = PETSC_TRUE;
5819     b_oth->nonew   = 0;
5820 
5821     PetscCall(PetscFree(bufj));
5822     if (!startsj_s || !bufa_ptr) {
5823       PetscCall(PetscFree2(sstartsj,rstartsj));
5824       PetscCall(PetscFree(bufa_ptr));
5825     } else {
5826       *startsj_s = sstartsj;
5827       *startsj_r = rstartsj;
5828       *bufa_ptr  = bufa;
5829     }
5830   } else if (scall == MAT_REUSE_MATRIX) {
5831     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5832   }
5833 
5834   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5835   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5836   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5837   PetscFunctionReturn(0);
5838 }
5839 
5840 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5841 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5842 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5843 #if defined(PETSC_HAVE_MKL_SPARSE)
5844 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5845 #endif
5846 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5847 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5848 #if defined(PETSC_HAVE_ELEMENTAL)
5849 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5850 #endif
5851 #if defined(PETSC_HAVE_SCALAPACK)
5852 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5853 #endif
5854 #if defined(PETSC_HAVE_HYPRE)
5855 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5856 #endif
5857 #if defined(PETSC_HAVE_CUDA)
5858 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5859 #endif
5860 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5861 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5862 #endif
5863 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5864 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5865 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5866 
5867 /*
5868     Computes (B'*A')' since computing B*A directly is untenable
5869 
5870                n                       p                          p
5871         [             ]       [             ]         [                 ]
5872       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5873         [             ]       [             ]         [                 ]
5874 
5875 */
5876 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5877 {
5878   Mat            At,Bt,Ct;
5879 
5880   PetscFunctionBegin;
5881   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5882   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5883   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5884   PetscCall(MatDestroy(&At));
5885   PetscCall(MatDestroy(&Bt));
5886   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5887   PetscCall(MatDestroy(&Ct));
5888   PetscFunctionReturn(0);
5889 }
5890 
5891 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5892 {
5893   PetscBool      cisdense;
5894 
5895   PetscFunctionBegin;
5896   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5897   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5898   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5899   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5900   if (!cisdense) {
5901     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5902   }
5903   PetscCall(MatSetUp(C));
5904 
5905   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5906   PetscFunctionReturn(0);
5907 }
5908 
5909 /* ----------------------------------------------------------------*/
5910 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5911 {
5912   Mat_Product *product = C->product;
5913   Mat         A = product->A,B=product->B;
5914 
5915   PetscFunctionBegin;
5916   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5917     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5918 
5919   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5920   C->ops->productsymbolic = MatProductSymbolic_AB;
5921   PetscFunctionReturn(0);
5922 }
5923 
5924 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5925 {
5926   Mat_Product    *product = C->product;
5927 
5928   PetscFunctionBegin;
5929   if (product->type == MATPRODUCT_AB) {
5930     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5931   }
5932   PetscFunctionReturn(0);
5933 }
5934 
5935 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
5936    is greater than value, or last if there is no such element.
5937 */
5938 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
5939 {
5940   PetscCount  it,step,count = last - first;
5941 
5942   PetscFunctionBegin;
5943   while (count > 0) {
5944     it   = first;
5945     step = count / 2;
5946     it  += step;
5947     if (!(value < array[it])) {
5948       first  = ++it;
5949       count -= step + 1;
5950     } else count = step;
5951   }
5952   *upper = first;
5953   PetscFunctionReturn(0);
5954 }
5955 
5956 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5957 
5958   Input Parameters:
5959 
5960     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5961     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5962 
5963     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5964 
5965     For Set1, j1[] contains column indices of the nonzeros.
5966     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5967     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5968     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
5969 
5970     Similar for Set2.
5971 
5972     This routine merges the two sets of nonzeros row by row and removes repeats.
5973 
5974   Output Parameters: (memory is allocated by the caller)
5975 
5976     i[],j[]: the CSR of the merged matrix, which has m rows.
5977     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5978     imap2[]: similar to imap1[], but for Set2.
5979     Note we order nonzeros row-by-row and from left to right.
5980 */
5981 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
5982   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
5983   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
5984 {
5985   PetscInt       r,m; /* Row index of mat */
5986   PetscCount     t,t1,t2,b1,e1,b2,e2;
5987 
5988   PetscFunctionBegin;
5989   PetscCall(MatGetLocalSize(mat,&m,NULL));
5990   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
5991   i[0] = 0;
5992   for (r=0; r<m; r++) { /* Do row by row merging */
5993     b1   = rowBegin1[r];
5994     e1   = rowEnd1[r];
5995     b2   = rowBegin2[r];
5996     e2   = rowEnd2[r];
5997     while (b1 < e1 && b2 < e2) {
5998       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
5999         j[t]      = j1[b1];
6000         imap1[t1] = t;
6001         imap2[t2] = t;
6002         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6003         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6004         t1++; t2++; t++;
6005       } else if (j1[b1] < j2[b2]) {
6006         j[t]      = j1[b1];
6007         imap1[t1] = t;
6008         b1       += jmap1[t1+1] - jmap1[t1];
6009         t1++; t++;
6010       } else {
6011         j[t]      = j2[b2];
6012         imap2[t2] = t;
6013         b2       += jmap2[t2+1] - jmap2[t2];
6014         t2++; t++;
6015       }
6016     }
6017     /* Merge the remaining in either j1[] or j2[] */
6018     while (b1 < e1) {
6019       j[t]      = j1[b1];
6020       imap1[t1] = t;
6021       b1       += jmap1[t1+1] - jmap1[t1];
6022       t1++; t++;
6023     }
6024     while (b2 < e2) {
6025       j[t]      = j2[b2];
6026       imap2[t2] = t;
6027       b2       += jmap2[t2+1] - jmap2[t2];
6028       t2++; t++;
6029     }
6030     i[r+1] = t;
6031   }
6032   PetscFunctionReturn(0);
6033 }
6034 
6035 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6036 
6037   Input Parameters:
6038     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6039     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6040       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6041 
6042       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6043       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6044 
6045   Output Parameters:
6046     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6047     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6048       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6049       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6050 
6051     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6052       Atot: number of entries belonging to the diagonal block.
6053       Annz: number of unique nonzeros belonging to the diagonal block.
6054       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6055         repeats (i.e., same 'i,j' pair).
6056       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6057         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6058 
6059       Atot: number of entries belonging to the diagonal block
6060       Annz: number of unique nonzeros belonging to the diagonal block.
6061 
6062     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6063 
6064     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6065 */
6066 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6067   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6068   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6069   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6070 {
6071   PetscInt          cstart,cend,rstart,rend,row,col;
6072   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6073   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6074   PetscCount        k,m,p,q,r,s,mid;
6075   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6076 
6077   PetscFunctionBegin;
6078   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6079   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6080   m    = rend - rstart;
6081 
6082   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6083 
6084   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6085      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6086   */
6087   while (k<n) {
6088     row = i[k];
6089     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6090     for (s=k; s<n; s++) if (i[s] != row) break;
6091     for (p=k; p<s; p++) {
6092       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6093       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6094     }
6095     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6096     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6097     rowBegin[row-rstart] = k;
6098     rowMid[row-rstart]   = mid;
6099     rowEnd[row-rstart]   = s;
6100 
6101     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6102     Atot += mid - k;
6103     Btot += s - mid;
6104 
6105     /* Count unique nonzeros of this diag/offdiag row */
6106     for (p=k; p<mid;) {
6107       col = j[p];
6108       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6109       Annz++;
6110     }
6111 
6112     for (p=mid; p<s;) {
6113       col = j[p];
6114       do {p++;} while (p<s && j[p] == col);
6115       Bnnz++;
6116     }
6117     k = s;
6118   }
6119 
6120   /* Allocation according to Atot, Btot, Annz, Bnnz */
6121   PetscCall(PetscMalloc1(Atot,&Aperm));
6122   PetscCall(PetscMalloc1(Btot,&Bperm));
6123   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6124   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6125 
6126   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6127   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6128   for (r=0; r<m; r++) {
6129     k     = rowBegin[r];
6130     mid   = rowMid[r];
6131     s     = rowEnd[r];
6132     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6133     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6134     Atot += mid - k;
6135     Btot += s - mid;
6136 
6137     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6138     for (p=k; p<mid;) {
6139       col = j[p];
6140       q   = p;
6141       do {p++;} while (p<mid && j[p] == col);
6142       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6143       Annz++;
6144     }
6145 
6146     for (p=mid; p<s;) {
6147       col = j[p];
6148       q   = p;
6149       do {p++;} while (p<s && j[p] == col);
6150       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6151       Bnnz++;
6152     }
6153   }
6154   /* Output */
6155   *Aperm_ = Aperm;
6156   *Annz_  = Annz;
6157   *Atot_  = Atot;
6158   *Ajmap_ = Ajmap;
6159   *Bperm_ = Bperm;
6160   *Bnnz_  = Bnnz;
6161   *Btot_  = Btot;
6162   *Bjmap_ = Bjmap;
6163   PetscFunctionReturn(0);
6164 }
6165 
6166 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6167 
6168   Input Parameters:
6169     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6170     nnz:  number of unique nonzeros in the merged matrix
6171     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6172     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6173 
6174   Output Parameter: (memory is allocated by the caller)
6175     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6176 
6177   Example:
6178     nnz1 = 4
6179     nnz  = 6
6180     imap = [1,3,4,5]
6181     jmap = [0,3,5,6,7]
6182    then,
6183     jmap_new = [0,0,3,3,5,6,7]
6184 */
6185 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6186 {
6187   PetscCount k,p;
6188 
6189   PetscFunctionBegin;
6190   jmap_new[0] = 0;
6191   p = nnz; /* p loops over jmap_new[] backwards */
6192   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6193     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6194   }
6195   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6196   PetscFunctionReturn(0);
6197 }
6198 
6199 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6200 {
6201   MPI_Comm                  comm;
6202   PetscMPIInt               rank,size;
6203   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6204   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6205   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6206 
6207   PetscFunctionBegin;
6208   PetscCall(PetscFree(mpiaij->garray));
6209   PetscCall(VecDestroy(&mpiaij->lvec));
6210 #if defined(PETSC_USE_CTABLE)
6211   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6212 #else
6213   PetscCall(PetscFree(mpiaij->colmap));
6214 #endif
6215   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6216   mat->assembled = PETSC_FALSE;
6217   mat->was_assembled = PETSC_FALSE;
6218   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6219 
6220   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6221   PetscCallMPI(MPI_Comm_size(comm,&size));
6222   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6223   PetscCall(PetscLayoutSetUp(mat->rmap));
6224   PetscCall(PetscLayoutSetUp(mat->cmap));
6225   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6226   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6227   PetscCall(MatGetLocalSize(mat,&m,&n));
6228   PetscCall(MatGetSize(mat,&M,&N));
6229 
6230   /* ---------------------------------------------------------------------------*/
6231   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6232   /* entries come first, then local rows, then remote rows.                     */
6233   /* ---------------------------------------------------------------------------*/
6234   PetscCount n1 = coo_n,*perm1;
6235   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6236   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6237   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6238   PetscCall(PetscArraycpy(j1,coo_j,n1));
6239   for (k=0; k<n1; k++) perm1[k] = k;
6240 
6241   /* Manipulate indices so that entries with negative row or col indices will have smallest
6242      row indices, local entries will have greater but negative row indices, and remote entries
6243      will have positive row indices.
6244   */
6245   for (k=0; k<n1; k++) {
6246     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6247     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6248     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6249     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6250   }
6251 
6252   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6253   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6254   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6255   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6256   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6257 
6258   /* ---------------------------------------------------------------------------*/
6259   /*           Split local rows into diag/offdiag portions                      */
6260   /* ---------------------------------------------------------------------------*/
6261   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6262   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6263   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6264 
6265   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6266   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6267   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6268 
6269   /* ---------------------------------------------------------------------------*/
6270   /*           Send remote rows to their owner                                  */
6271   /* ---------------------------------------------------------------------------*/
6272   /* Find which rows should be sent to which remote ranks*/
6273   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6274   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6275   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6276   const PetscInt *ranges;
6277   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6278 
6279   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6280   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6281   for (k=rem; k<n1;) {
6282     PetscMPIInt  owner;
6283     PetscInt     firstRow,lastRow;
6284 
6285     /* Locate a row range */
6286     firstRow = i1[k]; /* first row of this owner */
6287     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6288     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6289 
6290     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6291     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6292 
6293     /* All entries in [k,p) belong to this remote owner */
6294     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6295       PetscMPIInt *sendto2;
6296       PetscInt    *nentries2;
6297       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6298 
6299       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6300       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6301       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6302       PetscCall(PetscFree2(sendto,nentries2));
6303       sendto      = sendto2;
6304       nentries    = nentries2;
6305       maxNsend    = maxNsend2;
6306     }
6307     sendto[nsend]   = owner;
6308     nentries[nsend] = p - k;
6309     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6310     nsend++;
6311     k = p;
6312   }
6313 
6314   /* Build 1st SF to know offsets on remote to send data */
6315   PetscSF     sf1;
6316   PetscInt    nroots = 1,nroots2 = 0;
6317   PetscInt    nleaves = nsend,nleaves2 = 0;
6318   PetscInt    *offsets;
6319   PetscSFNode *iremote;
6320 
6321   PetscCall(PetscSFCreate(comm,&sf1));
6322   PetscCall(PetscMalloc1(nsend,&iremote));
6323   PetscCall(PetscMalloc1(nsend,&offsets));
6324   for (k=0; k<nsend; k++) {
6325     iremote[k].rank  = sendto[k];
6326     iremote[k].index = 0;
6327     nleaves2        += nentries[k];
6328     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6329   }
6330   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6331   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6332   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6333   PetscCall(PetscSFDestroy(&sf1));
6334   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6335 
6336   /* Build 2nd SF to send remote COOs to their owner */
6337   PetscSF sf2;
6338   nroots  = nroots2;
6339   nleaves = nleaves2;
6340   PetscCall(PetscSFCreate(comm,&sf2));
6341   PetscCall(PetscSFSetFromOptions(sf2));
6342   PetscCall(PetscMalloc1(nleaves,&iremote));
6343   p       = 0;
6344   for (k=0; k<nsend; k++) {
6345     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6346     for (q=0; q<nentries[k]; q++,p++) {
6347       iremote[p].rank  = sendto[k];
6348       iremote[p].index = offsets[k] + q;
6349     }
6350   }
6351   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6352 
6353   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
6354   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6355 
6356   /* Send the remote COOs to their owner */
6357   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6358   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6359   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6360   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6361   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6362   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6363   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6364 
6365   PetscCall(PetscFree(offsets));
6366   PetscCall(PetscFree2(sendto,nentries));
6367 
6368   /* ---------------------------------------------------------------*/
6369   /* Sort received COOs by row along with the permutation array     */
6370   /* ---------------------------------------------------------------*/
6371   for (k=0; k<n2; k++) perm2[k] = k;
6372   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6373 
6374   /* ---------------------------------------------------------------*/
6375   /* Split received COOs into diag/offdiag portions                 */
6376   /* ---------------------------------------------------------------*/
6377   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6378   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6379   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6380 
6381   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6382   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6383 
6384   /* --------------------------------------------------------------------------*/
6385   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6386   /* --------------------------------------------------------------------------*/
6387   PetscInt   *Ai,*Bi;
6388   PetscInt   *Aj,*Bj;
6389 
6390   PetscCall(PetscMalloc1(m+1,&Ai));
6391   PetscCall(PetscMalloc1(m+1,&Bi));
6392   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6393   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6394 
6395   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6396   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6397   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6398   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6399   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6400 
6401   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6402   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6403 
6404   /* --------------------------------------------------------------------------*/
6405   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6406   /* expect nonzeros in A/B most likely have local contributing entries        */
6407   /* --------------------------------------------------------------------------*/
6408   PetscInt Annz = Ai[m];
6409   PetscInt Bnnz = Bi[m];
6410   PetscCount *Ajmap1_new,*Bjmap1_new;
6411 
6412   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6413   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6414 
6415   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6416   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6417 
6418   PetscCall(PetscFree(Aimap1));
6419   PetscCall(PetscFree(Ajmap1));
6420   PetscCall(PetscFree(Bimap1));
6421   PetscCall(PetscFree(Bjmap1));
6422   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6423   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6424   PetscCall(PetscFree3(i1,j1,perm1));
6425   PetscCall(PetscFree3(i2,j2,perm2));
6426 
6427   Ajmap1 = Ajmap1_new;
6428   Bjmap1 = Bjmap1_new;
6429 
6430   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6431   if (Annz < Annz1 + Annz2) {
6432     PetscInt *Aj_new;
6433     PetscCall(PetscMalloc1(Annz,&Aj_new));
6434     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6435     PetscCall(PetscFree(Aj));
6436     Aj   = Aj_new;
6437   }
6438 
6439   if (Bnnz < Bnnz1 + Bnnz2) {
6440     PetscInt *Bj_new;
6441     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6442     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6443     PetscCall(PetscFree(Bj));
6444     Bj   = Bj_new;
6445   }
6446 
6447   /* --------------------------------------------------------------------------------*/
6448   /* Create new submatrices for on-process and off-process coupling                  */
6449   /* --------------------------------------------------------------------------------*/
6450   PetscScalar   *Aa,*Ba;
6451   MatType       rtype;
6452   Mat_SeqAIJ    *a,*b;
6453   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6454   PetscCall(PetscCalloc1(Bnnz,&Ba));
6455   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6456   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6457   PetscCall(MatDestroy(&mpiaij->A));
6458   PetscCall(MatDestroy(&mpiaij->B));
6459   PetscCall(MatGetRootType_Private(mat,&rtype));
6460   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6461   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6462   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6463 
6464   a = (Mat_SeqAIJ*)mpiaij->A->data;
6465   b = (Mat_SeqAIJ*)mpiaij->B->data;
6466   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6467   a->free_a       = b->free_a       = PETSC_TRUE;
6468   a->free_ij      = b->free_ij      = PETSC_TRUE;
6469 
6470   /* conversion must happen AFTER multiply setup */
6471   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6472   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6473   PetscCall(VecDestroy(&mpiaij->lvec));
6474   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6475   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6476 
6477   mpiaij->coo_n   = coo_n;
6478   mpiaij->coo_sf  = sf2;
6479   mpiaij->sendlen = nleaves;
6480   mpiaij->recvlen = nroots;
6481 
6482   mpiaij->Annz    = Annz;
6483   mpiaij->Bnnz    = Bnnz;
6484 
6485   mpiaij->Annz2   = Annz2;
6486   mpiaij->Bnnz2   = Bnnz2;
6487 
6488   mpiaij->Atot1   = Atot1;
6489   mpiaij->Atot2   = Atot2;
6490   mpiaij->Btot1   = Btot1;
6491   mpiaij->Btot2   = Btot2;
6492 
6493   mpiaij->Ajmap1  = Ajmap1;
6494   mpiaij->Aperm1  = Aperm1;
6495 
6496   mpiaij->Bjmap1  = Bjmap1;
6497   mpiaij->Bperm1  = Bperm1;
6498 
6499   mpiaij->Aimap2  = Aimap2;
6500   mpiaij->Ajmap2  = Ajmap2;
6501   mpiaij->Aperm2  = Aperm2;
6502 
6503   mpiaij->Bimap2  = Bimap2;
6504   mpiaij->Bjmap2  = Bjmap2;
6505   mpiaij->Bperm2  = Bperm2;
6506 
6507   mpiaij->Cperm1  = Cperm1;
6508 
6509   /* Allocate in preallocation. If not used, it has zero cost on host */
6510   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6511   PetscFunctionReturn(0);
6512 }
6513 
6514 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6515 {
6516   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6517   Mat                  A = mpiaij->A,B = mpiaij->B;
6518   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6519   PetscScalar          *Aa,*Ba;
6520   PetscScalar          *sendbuf = mpiaij->sendbuf;
6521   PetscScalar          *recvbuf = mpiaij->recvbuf;
6522   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6523   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6524   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6525   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6526 
6527   PetscFunctionBegin;
6528   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6529   PetscCall(MatSeqAIJGetArray(B,&Ba));
6530 
6531   /* Pack entries to be sent to remote */
6532   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6533 
6534   /* Send remote entries to their owner and overlap the communication with local computation */
6535   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6536   /* Add local entries to A and B */
6537   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6538     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6539     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6540     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6541   }
6542   for (PetscCount i=0; i<Bnnz; i++) {
6543     PetscScalar sum = 0.0;
6544     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6545     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6546   }
6547   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6548 
6549   /* Add received remote entries to A and B */
6550   for (PetscCount i=0; i<Annz2; i++) {
6551     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6552   }
6553   for (PetscCount i=0; i<Bnnz2; i++) {
6554     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6555   }
6556   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6557   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6558   PetscFunctionReturn(0);
6559 }
6560 
6561 /* ----------------------------------------------------------------*/
6562 
6563 /*MC
6564    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6565 
6566    Options Database Keys:
6567 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6568 
6569    Level: beginner
6570 
6571    Notes:
6572     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6573     in this case the values associated with the rows and columns one passes in are set to zero
6574     in the matrix
6575 
6576     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6577     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6578 
6579 .seealso: MatCreateAIJ()
6580 M*/
6581 
6582 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6583 {
6584   Mat_MPIAIJ     *b;
6585   PetscMPIInt    size;
6586 
6587   PetscFunctionBegin;
6588   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6589 
6590   PetscCall(PetscNewLog(B,&b));
6591   B->data       = (void*)b;
6592   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6593   B->assembled  = PETSC_FALSE;
6594   B->insertmode = NOT_SET_VALUES;
6595   b->size       = size;
6596 
6597   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6598 
6599   /* build cache for off array entries formed */
6600   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6601 
6602   b->donotstash  = PETSC_FALSE;
6603   b->colmap      = NULL;
6604   b->garray      = NULL;
6605   b->roworiented = PETSC_TRUE;
6606 
6607   /* stuff used for matrix vector multiply */
6608   b->lvec  = NULL;
6609   b->Mvctx = NULL;
6610 
6611   /* stuff for MatGetRow() */
6612   b->rowindices   = NULL;
6613   b->rowvalues    = NULL;
6614   b->getrowactive = PETSC_FALSE;
6615 
6616   /* flexible pointer used in CUSPARSE classes */
6617   b->spptr = NULL;
6618 
6619   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6620   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6621   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6622   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6623   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6624   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6625   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6626   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6627   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6628   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6629 #if defined(PETSC_HAVE_CUDA)
6630   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6631 #endif
6632 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6633   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6634 #endif
6635 #if defined(PETSC_HAVE_MKL_SPARSE)
6636   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6637 #endif
6638   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6639   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6640   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6641   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6642 #if defined(PETSC_HAVE_ELEMENTAL)
6643   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6644 #endif
6645 #if defined(PETSC_HAVE_SCALAPACK)
6646   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6647 #endif
6648   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6649   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6650 #if defined(PETSC_HAVE_HYPRE)
6651   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6652   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6653 #endif
6654   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6655   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6656   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6657   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6658   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6659   PetscFunctionReturn(0);
6660 }
6661 
6662 /*@C
6663      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6664          and "off-diagonal" part of the matrix in CSR format.
6665 
6666    Collective
6667 
6668    Input Parameters:
6669 +  comm - MPI communicator
6670 .  m - number of local rows (Cannot be PETSC_DECIDE)
6671 .  n - This value should be the same as the local size used in creating the
6672        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6673        calculated if N is given) For square matrices n is almost always m.
6674 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6675 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6676 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6677 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6678 .   a - matrix values
6679 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6680 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6681 -   oa - matrix values
6682 
6683    Output Parameter:
6684 .   mat - the matrix
6685 
6686    Level: advanced
6687 
6688    Notes:
6689        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6690        must free the arrays once the matrix has been destroyed and not before.
6691 
6692        The i and j indices are 0 based
6693 
6694        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6695 
6696        This sets local rows and cannot be used to set off-processor values.
6697 
6698        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6699        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6700        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6701        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6702        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6703        communication if it is known that only local entries will be set.
6704 
6705 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6706           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6707 @*/
6708 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6709 {
6710   Mat_MPIAIJ     *maij;
6711 
6712   PetscFunctionBegin;
6713   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6714   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6715   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6716   PetscCall(MatCreate(comm,mat));
6717   PetscCall(MatSetSizes(*mat,m,n,M,N));
6718   PetscCall(MatSetType(*mat,MATMPIAIJ));
6719   maij = (Mat_MPIAIJ*) (*mat)->data;
6720 
6721   (*mat)->preallocated = PETSC_TRUE;
6722 
6723   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6724   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6725 
6726   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6727   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6728 
6729   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6730   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6731   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6732   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6733   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6734   PetscFunctionReturn(0);
6735 }
6736 
6737 typedef struct {
6738   Mat       *mp;    /* intermediate products */
6739   PetscBool *mptmp; /* is the intermediate product temporary ? */
6740   PetscInt  cp;     /* number of intermediate products */
6741 
6742   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6743   PetscInt    *startsj_s,*startsj_r;
6744   PetscScalar *bufa;
6745   Mat         P_oth;
6746 
6747   /* may take advantage of merging product->B */
6748   Mat Bloc; /* B-local by merging diag and off-diag */
6749 
6750   /* cusparse does not have support to split between symbolic and numeric phases.
6751      When api_user is true, we don't need to update the numerical values
6752      of the temporary storage */
6753   PetscBool reusesym;
6754 
6755   /* support for COO values insertion */
6756   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6757   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6758   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6759   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6760   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6761   PetscMemType mtype;
6762 
6763   /* customization */
6764   PetscBool abmerge;
6765   PetscBool P_oth_bind;
6766 } MatMatMPIAIJBACKEND;
6767 
6768 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6769 {
6770   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6771   PetscInt            i;
6772 
6773   PetscFunctionBegin;
6774   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6775   PetscCall(PetscFree(mmdata->bufa));
6776   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6777   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6778   PetscCall(MatDestroy(&mmdata->P_oth));
6779   PetscCall(MatDestroy(&mmdata->Bloc));
6780   PetscCall(PetscSFDestroy(&mmdata->sf));
6781   for (i = 0; i < mmdata->cp; i++) {
6782     PetscCall(MatDestroy(&mmdata->mp[i]));
6783   }
6784   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6785   PetscCall(PetscFree(mmdata->own[0]));
6786   PetscCall(PetscFree(mmdata->own));
6787   PetscCall(PetscFree(mmdata->off[0]));
6788   PetscCall(PetscFree(mmdata->off));
6789   PetscCall(PetscFree(mmdata));
6790   PetscFunctionReturn(0);
6791 }
6792 
6793 /* Copy selected n entries with indices in idx[] of A to v[].
6794    If idx is NULL, copy the whole data array of A to v[]
6795  */
6796 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6797 {
6798   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6799 
6800   PetscFunctionBegin;
6801   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6802   if (f) {
6803     PetscCall((*f)(A,n,idx,v));
6804   } else {
6805     const PetscScalar *vv;
6806 
6807     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6808     if (n && idx) {
6809       PetscScalar    *w = v;
6810       const PetscInt *oi = idx;
6811       PetscInt       j;
6812 
6813       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6814     } else {
6815       PetscCall(PetscArraycpy(v,vv,n));
6816     }
6817     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6818   }
6819   PetscFunctionReturn(0);
6820 }
6821 
6822 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6823 {
6824   MatMatMPIAIJBACKEND *mmdata;
6825   PetscInt            i,n_d,n_o;
6826 
6827   PetscFunctionBegin;
6828   MatCheckProduct(C,1);
6829   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6830   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6831   if (!mmdata->reusesym) { /* update temporary matrices */
6832     if (mmdata->P_oth) {
6833       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6834     }
6835     if (mmdata->Bloc) {
6836       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6837     }
6838   }
6839   mmdata->reusesym = PETSC_FALSE;
6840 
6841   for (i = 0; i < mmdata->cp; i++) {
6842     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6843     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6844   }
6845   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6846     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6847 
6848     if (mmdata->mptmp[i]) continue;
6849     if (noff) {
6850       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6851 
6852       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6853       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6854       n_o += noff;
6855       n_d += nown;
6856     } else {
6857       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6858 
6859       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6860       n_d += mm->nz;
6861     }
6862   }
6863   if (mmdata->hasoffproc) { /* offprocess insertion */
6864     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6865     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6866   }
6867   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6868   PetscFunctionReturn(0);
6869 }
6870 
6871 /* Support for Pt * A, A * P, or Pt * A * P */
6872 #define MAX_NUMBER_INTERMEDIATE 4
6873 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6874 {
6875   Mat_Product            *product = C->product;
6876   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6877   Mat_MPIAIJ             *a,*p;
6878   MatMatMPIAIJBACKEND    *mmdata;
6879   ISLocalToGlobalMapping P_oth_l2g = NULL;
6880   IS                     glob = NULL;
6881   const char             *prefix;
6882   char                   pprefix[256];
6883   const PetscInt         *globidx,*P_oth_idx;
6884   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6885   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6886   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6887                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6888                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6889   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6890 
6891   MatProductType         ptype;
6892   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6893   PetscMPIInt            size;
6894 
6895   PetscFunctionBegin;
6896   MatCheckProduct(C,1);
6897   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6898   ptype = product->type;
6899   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6900     ptype = MATPRODUCT_AB;
6901     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6902   }
6903   switch (ptype) {
6904   case MATPRODUCT_AB:
6905     A = product->A;
6906     P = product->B;
6907     m = A->rmap->n;
6908     n = P->cmap->n;
6909     M = A->rmap->N;
6910     N = P->cmap->N;
6911     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6912     break;
6913   case MATPRODUCT_AtB:
6914     P = product->A;
6915     A = product->B;
6916     m = P->cmap->n;
6917     n = A->cmap->n;
6918     M = P->cmap->N;
6919     N = A->cmap->N;
6920     hasoffproc = PETSC_TRUE;
6921     break;
6922   case MATPRODUCT_PtAP:
6923     A = product->A;
6924     P = product->B;
6925     m = P->cmap->n;
6926     n = P->cmap->n;
6927     M = P->cmap->N;
6928     N = P->cmap->N;
6929     hasoffproc = PETSC_TRUE;
6930     break;
6931   default:
6932     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6933   }
6934   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6935   if (size == 1) hasoffproc = PETSC_FALSE;
6936 
6937   /* defaults */
6938   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6939     mp[i]    = NULL;
6940     mptmp[i] = PETSC_FALSE;
6941     rmapt[i] = -1;
6942     cmapt[i] = -1;
6943     rmapa[i] = NULL;
6944     cmapa[i] = NULL;
6945   }
6946 
6947   /* customization */
6948   PetscCall(PetscNew(&mmdata));
6949   mmdata->reusesym = product->api_user;
6950   if (ptype == MATPRODUCT_AB) {
6951     if (product->api_user) {
6952       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
6953       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6954       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6955       PetscOptionsEnd();
6956     } else {
6957       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
6958       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6959       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6960       PetscOptionsEnd();
6961     }
6962   } else if (ptype == MATPRODUCT_PtAP) {
6963     if (product->api_user) {
6964       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
6965       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6966       PetscOptionsEnd();
6967     } else {
6968       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
6969       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6970       PetscOptionsEnd();
6971     }
6972   }
6973   a = (Mat_MPIAIJ*)A->data;
6974   p = (Mat_MPIAIJ*)P->data;
6975   PetscCall(MatSetSizes(C,m,n,M,N));
6976   PetscCall(PetscLayoutSetUp(C->rmap));
6977   PetscCall(PetscLayoutSetUp(C->cmap));
6978   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6979   PetscCall(MatGetOptionsPrefix(C,&prefix));
6980 
6981   cp   = 0;
6982   switch (ptype) {
6983   case MATPRODUCT_AB: /* A * P */
6984     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6985 
6986     /* A_diag * P_local (merged or not) */
6987     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6988       /* P is product->B */
6989       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
6990       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
6991       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6992       PetscCall(MatProductSetFill(mp[cp],product->fill));
6993       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6994       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6995       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6996       mp[cp]->product->api_user = product->api_user;
6997       PetscCall(MatProductSetFromOptions(mp[cp]));
6998       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6999       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7000       PetscCall(ISGetIndices(glob,&globidx));
7001       rmapt[cp] = 1;
7002       cmapt[cp] = 2;
7003       cmapa[cp] = globidx;
7004       mptmp[cp] = PETSC_FALSE;
7005       cp++;
7006     } else { /* A_diag * P_diag and A_diag * P_off */
7007       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7008       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7009       PetscCall(MatProductSetFill(mp[cp],product->fill));
7010       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7011       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7012       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7013       mp[cp]->product->api_user = product->api_user;
7014       PetscCall(MatProductSetFromOptions(mp[cp]));
7015       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7016       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7017       rmapt[cp] = 1;
7018       cmapt[cp] = 1;
7019       mptmp[cp] = PETSC_FALSE;
7020       cp++;
7021       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7022       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7023       PetscCall(MatProductSetFill(mp[cp],product->fill));
7024       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7025       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7026       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7027       mp[cp]->product->api_user = product->api_user;
7028       PetscCall(MatProductSetFromOptions(mp[cp]));
7029       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7030       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7031       rmapt[cp] = 1;
7032       cmapt[cp] = 2;
7033       cmapa[cp] = p->garray;
7034       mptmp[cp] = PETSC_FALSE;
7035       cp++;
7036     }
7037 
7038     /* A_off * P_other */
7039     if (mmdata->P_oth) {
7040       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7041       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7042       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7043       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7044       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7045       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7046       PetscCall(MatProductSetFill(mp[cp],product->fill));
7047       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7048       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7049       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7050       mp[cp]->product->api_user = product->api_user;
7051       PetscCall(MatProductSetFromOptions(mp[cp]));
7052       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7053       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7054       rmapt[cp] = 1;
7055       cmapt[cp] = 2;
7056       cmapa[cp] = P_oth_idx;
7057       mptmp[cp] = PETSC_FALSE;
7058       cp++;
7059     }
7060     break;
7061 
7062   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7063     /* A is product->B */
7064     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7065     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7066       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7067       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7068       PetscCall(MatProductSetFill(mp[cp],product->fill));
7069       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7070       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7071       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7072       mp[cp]->product->api_user = product->api_user;
7073       PetscCall(MatProductSetFromOptions(mp[cp]));
7074       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7075       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7076       PetscCall(ISGetIndices(glob,&globidx));
7077       rmapt[cp] = 2;
7078       rmapa[cp] = globidx;
7079       cmapt[cp] = 2;
7080       cmapa[cp] = globidx;
7081       mptmp[cp] = PETSC_FALSE;
7082       cp++;
7083     } else {
7084       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7085       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7086       PetscCall(MatProductSetFill(mp[cp],product->fill));
7087       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7088       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7089       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7090       mp[cp]->product->api_user = product->api_user;
7091       PetscCall(MatProductSetFromOptions(mp[cp]));
7092       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7093       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7094       PetscCall(ISGetIndices(glob,&globidx));
7095       rmapt[cp] = 1;
7096       cmapt[cp] = 2;
7097       cmapa[cp] = globidx;
7098       mptmp[cp] = PETSC_FALSE;
7099       cp++;
7100       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7101       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7102       PetscCall(MatProductSetFill(mp[cp],product->fill));
7103       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7104       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7105       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7106       mp[cp]->product->api_user = product->api_user;
7107       PetscCall(MatProductSetFromOptions(mp[cp]));
7108       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7109       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7110       rmapt[cp] = 2;
7111       rmapa[cp] = p->garray;
7112       cmapt[cp] = 2;
7113       cmapa[cp] = globidx;
7114       mptmp[cp] = PETSC_FALSE;
7115       cp++;
7116     }
7117     break;
7118   case MATPRODUCT_PtAP:
7119     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7120     /* P is product->B */
7121     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7122     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7123     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7124     PetscCall(MatProductSetFill(mp[cp],product->fill));
7125     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7126     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7127     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7128     mp[cp]->product->api_user = product->api_user;
7129     PetscCall(MatProductSetFromOptions(mp[cp]));
7130     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7131     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7132     PetscCall(ISGetIndices(glob,&globidx));
7133     rmapt[cp] = 2;
7134     rmapa[cp] = globidx;
7135     cmapt[cp] = 2;
7136     cmapa[cp] = globidx;
7137     mptmp[cp] = PETSC_FALSE;
7138     cp++;
7139     if (mmdata->P_oth) {
7140       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7141       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7142       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7143       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7144       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7145       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7146       PetscCall(MatProductSetFill(mp[cp],product->fill));
7147       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7148       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7149       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7150       mp[cp]->product->api_user = product->api_user;
7151       PetscCall(MatProductSetFromOptions(mp[cp]));
7152       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7153       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7154       mptmp[cp] = PETSC_TRUE;
7155       cp++;
7156       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7157       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7158       PetscCall(MatProductSetFill(mp[cp],product->fill));
7159       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7160       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7161       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7162       mp[cp]->product->api_user = product->api_user;
7163       PetscCall(MatProductSetFromOptions(mp[cp]));
7164       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7165       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7166       rmapt[cp] = 2;
7167       rmapa[cp] = globidx;
7168       cmapt[cp] = 2;
7169       cmapa[cp] = P_oth_idx;
7170       mptmp[cp] = PETSC_FALSE;
7171       cp++;
7172     }
7173     break;
7174   default:
7175     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7176   }
7177   /* sanity check */
7178   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7179 
7180   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7181   for (i = 0; i < cp; i++) {
7182     mmdata->mp[i]    = mp[i];
7183     mmdata->mptmp[i] = mptmp[i];
7184   }
7185   mmdata->cp = cp;
7186   C->product->data       = mmdata;
7187   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7188   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7189 
7190   /* memory type */
7191   mmdata->mtype = PETSC_MEMTYPE_HOST;
7192   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7193   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7194   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7195   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7196 
7197   /* prepare coo coordinates for values insertion */
7198 
7199   /* count total nonzeros of those intermediate seqaij Mats
7200     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7201     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7202     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7203   */
7204   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7205     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7206     if (mptmp[cp]) continue;
7207     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7208       const PetscInt *rmap = rmapa[cp];
7209       const PetscInt mr = mp[cp]->rmap->n;
7210       const PetscInt rs = C->rmap->rstart;
7211       const PetscInt re = C->rmap->rend;
7212       const PetscInt *ii  = mm->i;
7213       for (i = 0; i < mr; i++) {
7214         const PetscInt gr = rmap[i];
7215         const PetscInt nz = ii[i+1] - ii[i];
7216         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7217         else ncoo_oown += nz; /* this row is local */
7218       }
7219     } else ncoo_d += mm->nz;
7220   }
7221 
7222   /*
7223     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7224 
7225     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7226 
7227     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7228 
7229     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7230     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7231     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7232 
7233     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7234     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7235   */
7236   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7237   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7238 
7239   /* gather (i,j) of nonzeros inserted by remote procs */
7240   if (hasoffproc) {
7241     PetscSF  msf;
7242     PetscInt ncoo2,*coo_i2,*coo_j2;
7243 
7244     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7245     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7246     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7247 
7248     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7249       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7250       PetscInt   *idxoff = mmdata->off[cp];
7251       PetscInt   *idxown = mmdata->own[cp];
7252       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7253         const PetscInt *rmap = rmapa[cp];
7254         const PetscInt *cmap = cmapa[cp];
7255         const PetscInt *ii  = mm->i;
7256         PetscInt       *coi = coo_i + ncoo_o;
7257         PetscInt       *coj = coo_j + ncoo_o;
7258         const PetscInt mr = mp[cp]->rmap->n;
7259         const PetscInt rs = C->rmap->rstart;
7260         const PetscInt re = C->rmap->rend;
7261         const PetscInt cs = C->cmap->rstart;
7262         for (i = 0; i < mr; i++) {
7263           const PetscInt *jj = mm->j + ii[i];
7264           const PetscInt gr  = rmap[i];
7265           const PetscInt nz  = ii[i+1] - ii[i];
7266           if (gr < rs || gr >= re) { /* this is an offproc row */
7267             for (j = ii[i]; j < ii[i+1]; j++) {
7268               *coi++ = gr;
7269               *idxoff++ = j;
7270             }
7271             if (!cmapt[cp]) { /* already global */
7272               for (j = 0; j < nz; j++) *coj++ = jj[j];
7273             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7274               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7275             } else { /* offdiag */
7276               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7277             }
7278             ncoo_o += nz;
7279           } else { /* this is a local row */
7280             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7281           }
7282         }
7283       }
7284       mmdata->off[cp + 1] = idxoff;
7285       mmdata->own[cp + 1] = idxown;
7286     }
7287 
7288     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7289     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7290     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7291     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7292     ncoo = ncoo_d + ncoo_oown + ncoo2;
7293     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7294     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7295     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7296     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7297     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7298     PetscCall(PetscFree2(coo_i,coo_j));
7299     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7300     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7301     coo_i = coo_i2;
7302     coo_j = coo_j2;
7303   } else { /* no offproc values insertion */
7304     ncoo = ncoo_d;
7305     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7306 
7307     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7308     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7309     PetscCall(PetscSFSetUp(mmdata->sf));
7310   }
7311   mmdata->hasoffproc = hasoffproc;
7312 
7313   /* gather (i,j) of nonzeros inserted locally */
7314   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7315     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7316     PetscInt       *coi = coo_i + ncoo_d;
7317     PetscInt       *coj = coo_j + ncoo_d;
7318     const PetscInt *jj  = mm->j;
7319     const PetscInt *ii  = mm->i;
7320     const PetscInt *cmap = cmapa[cp];
7321     const PetscInt *rmap = rmapa[cp];
7322     const PetscInt mr = mp[cp]->rmap->n;
7323     const PetscInt rs = C->rmap->rstart;
7324     const PetscInt re = C->rmap->rend;
7325     const PetscInt cs = C->cmap->rstart;
7326 
7327     if (mptmp[cp]) continue;
7328     if (rmapt[cp] == 1) { /* consecutive rows */
7329       /* fill coo_i */
7330       for (i = 0; i < mr; i++) {
7331         const PetscInt gr = i + rs;
7332         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7333       }
7334       /* fill coo_j */
7335       if (!cmapt[cp]) { /* type-0, already global */
7336         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7337       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7338         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7339       } else { /* type-2, local to global for sparse columns */
7340         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7341       }
7342       ncoo_d += mm->nz;
7343     } else if (rmapt[cp] == 2) { /* sparse rows */
7344       for (i = 0; i < mr; i++) {
7345         const PetscInt *jj = mm->j + ii[i];
7346         const PetscInt gr  = rmap[i];
7347         const PetscInt nz  = ii[i+1] - ii[i];
7348         if (gr >= rs && gr < re) { /* local rows */
7349           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7350           if (!cmapt[cp]) { /* type-0, already global */
7351             for (j = 0; j < nz; j++) *coj++ = jj[j];
7352           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7353             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7354           } else { /* type-2, local to global for sparse columns */
7355             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7356           }
7357           ncoo_d += nz;
7358         }
7359       }
7360     }
7361   }
7362   if (glob) {
7363     PetscCall(ISRestoreIndices(glob,&globidx));
7364   }
7365   PetscCall(ISDestroy(&glob));
7366   if (P_oth_l2g) {
7367     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7368   }
7369   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7370   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7371   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7372 
7373   /* preallocate with COO data */
7374   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7375   PetscCall(PetscFree2(coo_i,coo_j));
7376   PetscFunctionReturn(0);
7377 }
7378 
7379 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7380 {
7381   Mat_Product *product = mat->product;
7382 #if defined(PETSC_HAVE_DEVICE)
7383   PetscBool    match   = PETSC_FALSE;
7384   PetscBool    usecpu  = PETSC_FALSE;
7385 #else
7386   PetscBool    match   = PETSC_TRUE;
7387 #endif
7388 
7389   PetscFunctionBegin;
7390   MatCheckProduct(mat,1);
7391 #if defined(PETSC_HAVE_DEVICE)
7392   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7393     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7394   }
7395   if (match) { /* we can always fallback to the CPU if requested */
7396     switch (product->type) {
7397     case MATPRODUCT_AB:
7398       if (product->api_user) {
7399         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7400         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7401         PetscOptionsEnd();
7402       } else {
7403         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7404         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7405         PetscOptionsEnd();
7406       }
7407       break;
7408     case MATPRODUCT_AtB:
7409       if (product->api_user) {
7410         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7411         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7412         PetscOptionsEnd();
7413       } else {
7414         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7415         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7416         PetscOptionsEnd();
7417       }
7418       break;
7419     case MATPRODUCT_PtAP:
7420       if (product->api_user) {
7421         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7422         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7423         PetscOptionsEnd();
7424       } else {
7425         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7426         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7427         PetscOptionsEnd();
7428       }
7429       break;
7430     default:
7431       break;
7432     }
7433     match = (PetscBool)!usecpu;
7434   }
7435 #endif
7436   if (match) {
7437     switch (product->type) {
7438     case MATPRODUCT_AB:
7439     case MATPRODUCT_AtB:
7440     case MATPRODUCT_PtAP:
7441       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7442       break;
7443     default:
7444       break;
7445     }
7446   }
7447   /* fallback to MPIAIJ ops */
7448   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7449   PetscFunctionReturn(0);
7450 }
7451 
7452 /*
7453     Special version for direct calls from Fortran
7454 */
7455 #include <petsc/private/fortranimpl.h>
7456 
7457 /* Change these macros so can be used in void function */
7458 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7459 #undef  PetscCall
7460 #define PetscCall(...) do {                                                                    \
7461     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7462     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7463       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7464       return;                                                                                  \
7465     }                                                                                          \
7466   } while (0)
7467 
7468 #undef SETERRQ
7469 #define SETERRQ(comm,ierr,...) do {                                                            \
7470     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7471     return;                                                                                    \
7472   } while (0)
7473 
7474 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7475 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7476 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7477 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7478 #else
7479 #endif
7480 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7481 {
7482   Mat          mat  = *mmat;
7483   PetscInt     m    = *mm, n = *mn;
7484   InsertMode   addv = *maddv;
7485   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7486   PetscScalar  value;
7487 
7488   MatCheckPreallocated(mat,1);
7489   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7490   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7491   {
7492     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7493     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7494     PetscBool roworiented = aij->roworiented;
7495 
7496     /* Some Variables required in the macro */
7497     Mat        A                    = aij->A;
7498     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7499     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7500     MatScalar  *aa;
7501     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7502     Mat        B                    = aij->B;
7503     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7504     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7505     MatScalar  *ba;
7506     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7507      * cannot use "#if defined" inside a macro. */
7508     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7509 
7510     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7511     PetscInt  nonew = a->nonew;
7512     MatScalar *ap1,*ap2;
7513 
7514     PetscFunctionBegin;
7515     PetscCall(MatSeqAIJGetArray(A,&aa));
7516     PetscCall(MatSeqAIJGetArray(B,&ba));
7517     for (i=0; i<m; i++) {
7518       if (im[i] < 0) continue;
7519       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7520       if (im[i] >= rstart && im[i] < rend) {
7521         row      = im[i] - rstart;
7522         lastcol1 = -1;
7523         rp1      = aj + ai[row];
7524         ap1      = aa + ai[row];
7525         rmax1    = aimax[row];
7526         nrow1    = ailen[row];
7527         low1     = 0;
7528         high1    = nrow1;
7529         lastcol2 = -1;
7530         rp2      = bj + bi[row];
7531         ap2      = ba + bi[row];
7532         rmax2    = bimax[row];
7533         nrow2    = bilen[row];
7534         low2     = 0;
7535         high2    = nrow2;
7536 
7537         for (j=0; j<n; j++) {
7538           if (roworiented) value = v[i*n+j];
7539           else value = v[i+j*m];
7540           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7541           if (in[j] >= cstart && in[j] < cend) {
7542             col = in[j] - cstart;
7543             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7544           } else if (in[j] < 0) continue;
7545           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7546             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7547             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
7548           } else {
7549             if (mat->was_assembled) {
7550               if (!aij->colmap) {
7551                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7552               }
7553 #if defined(PETSC_USE_CTABLE)
7554               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
7555               col--;
7556 #else
7557               col = aij->colmap[in[j]] - 1;
7558 #endif
7559               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7560                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7561                 col  =  in[j];
7562                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7563                 B        = aij->B;
7564                 b        = (Mat_SeqAIJ*)B->data;
7565                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7566                 rp2      = bj + bi[row];
7567                 ap2      = ba + bi[row];
7568                 rmax2    = bimax[row];
7569                 nrow2    = bilen[row];
7570                 low2     = 0;
7571                 high2    = nrow2;
7572                 bm       = aij->B->rmap->n;
7573                 ba       = b->a;
7574                 inserted = PETSC_FALSE;
7575               }
7576             } else col = in[j];
7577             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7578           }
7579         }
7580       } else if (!aij->donotstash) {
7581         if (roworiented) {
7582           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7583         } else {
7584           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7585         }
7586       }
7587     }
7588     PetscCall(MatSeqAIJRestoreArray(A,&aa));
7589     PetscCall(MatSeqAIJRestoreArray(B,&ba));
7590   }
7591   PetscFunctionReturnVoid();
7592 }
7593 /* Undefining these here since they were redefined from their original definition above! No
7594  * other PETSc functions should be defined past this point, as it is impossible to recover the
7595  * original definitions */
7596 #undef PetscCall
7597 #undef SETERRQ
7598