xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 70faa4e68e85355a5b9d00c7669f5865fa0fdf3e)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50 
51   PetscFunctionBegin;
52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
53   A->boundtocpu = flg;
54 #endif
55   if (a->A) {
56     PetscCall(MatBindToCPU(a->A,flg));
57   }
58   if (a->B) {
59     PetscCall(MatBindToCPU(a->B,flg));
60   }
61 
62   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
63    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
64    * to differ from the parent matrix. */
65   if (a->lvec) {
66     PetscCall(VecBindToCPU(a->lvec,flg));
67   }
68   if (a->diag) {
69     PetscCall(VecBindToCPU(a->diag,flg));
70   }
71 
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
76 {
77   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
78 
79   PetscFunctionBegin;
80   if (mat->A) {
81     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
82     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
88 {
89   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
90   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
91   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
92   const PetscInt  *ia,*ib;
93   const MatScalar *aa,*bb,*aav,*bav;
94   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
95   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
96 
97   PetscFunctionBegin;
98   *keptrows = NULL;
99 
100   ia   = a->i;
101   ib   = b->i;
102   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
103   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
104   for (i=0; i<m; i++) {
105     na = ia[i+1] - ia[i];
106     nb = ib[i+1] - ib[i];
107     if (!na && !nb) {
108       cnt++;
109       goto ok1;
110     }
111     aa = aav + ia[i];
112     for (j=0; j<na; j++) {
113       if (aa[j] != 0.0) goto ok1;
114     }
115     bb = bav + ib[i];
116     for (j=0; j <nb; j++) {
117       if (bb[j] != 0.0) goto ok1;
118     }
119     cnt++;
120 ok1:;
121   }
122   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
123   if (!n0rows) {
124     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
125     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
126     PetscFunctionReturn(0);
127   }
128   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
129   cnt  = 0;
130   for (i=0; i<m; i++) {
131     na = ia[i+1] - ia[i];
132     nb = ib[i+1] - ib[i];
133     if (!na && !nb) continue;
134     aa = aav + ia[i];
135     for (j=0; j<na;j++) {
136       if (aa[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141     bb = bav + ib[i];
142     for (j=0; j<nb; j++) {
143       if (bb[j] != 0.0) {
144         rows[cnt++] = rstart + i;
145         goto ok2;
146       }
147     }
148 ok2:;
149   }
150   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
151   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
152   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
157 {
158   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
159   PetscBool         cong;
160 
161   PetscFunctionBegin;
162   PetscCall(MatHasCongruentLayouts(Y,&cong));
163   if (Y->assembled && cong) {
164     PetscCall(MatDiagonalSet(aij->A,D,is));
165   } else {
166     PetscCall(MatDiagonalSet_Default(Y,D,is));
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
172 {
173   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
174   PetscInt       i,rstart,nrows,*rows;
175 
176   PetscFunctionBegin;
177   *zrows = NULL;
178   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
179   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
180   for (i=0; i<nrows; i++) rows[i] += rstart;
181   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
186 {
187   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
188   PetscInt          i,m,n,*garray = aij->garray;
189   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
190   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
191   PetscReal         *work;
192   const PetscScalar *dummy;
193 
194   PetscFunctionBegin;
195   PetscCall(MatGetSize(A,&m,&n));
196   PetscCall(PetscCalloc1(n,&work));
197   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
198   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
199   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
200   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
201   if (type == NORM_2) {
202     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
203       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
204     }
205     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
206       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
207     }
208   } else if (type == NORM_1) {
209     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
210       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
211     }
212     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
213       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214     }
215   } else if (type == NORM_INFINITY) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
221     }
222   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228     }
229   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
235     }
236   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
237   if (type == NORM_INFINITY) {
238     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
239   } else {
240     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
241   }
242   PetscCall(PetscFree(work));
243   if (type == NORM_2) {
244     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
245   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
246     for (i=0; i<n; i++) reductions[i] /= m;
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
252 {
253   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
254   IS              sis,gis;
255   const PetscInt  *isis,*igis;
256   PetscInt        n,*iis,nsis,ngis,rstart,i;
257 
258   PetscFunctionBegin;
259   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
260   PetscCall(MatFindNonzeroRows(a->B,&gis));
261   PetscCall(ISGetSize(gis,&ngis));
262   PetscCall(ISGetSize(sis,&nsis));
263   PetscCall(ISGetIndices(sis,&isis));
264   PetscCall(ISGetIndices(gis,&igis));
265 
266   PetscCall(PetscMalloc1(ngis+nsis,&iis));
267   PetscCall(PetscArraycpy(iis,igis,ngis));
268   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
269   n    = ngis + nsis;
270   PetscCall(PetscSortRemoveDupsInt(&n,iis));
271   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
272   for (i=0; i<n; i++) iis[i] += rstart;
273   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
274 
275   PetscCall(ISRestoreIndices(sis,&isis));
276   PetscCall(ISRestoreIndices(gis,&igis));
277   PetscCall(ISDestroy(&sis));
278   PetscCall(ISDestroy(&gis));
279   PetscFunctionReturn(0);
280 }
281 
282 /*
283   Local utility routine that creates a mapping from the global column
284 number to the local number in the off-diagonal part of the local
285 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
286 a slightly higher hash table cost; without it it is not scalable (each processor
287 has an order N integer array but is fast to access.
288 */
289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
290 {
291   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
292   PetscInt       n = aij->B->cmap->n,i;
293 
294   PetscFunctionBegin;
295   PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
296 #if defined(PETSC_USE_CTABLE)
297   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
298   for (i=0; i<n; i++) {
299     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
300   }
301 #else
302   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
303   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
304   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
305 #endif
306   PetscFunctionReturn(0);
307 }
308 
309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
310 { \
311     if (col <= lastcol1)  low1 = 0;     \
312     else                 high1 = nrow1; \
313     lastcol1 = col;\
314     while (high1-low1 > 5) { \
315       t = (low1+high1)/2; \
316       if (rp1[t] > col) high1 = t; \
317       else              low1  = t; \
318     } \
319       for (_i=low1; _i<high1; _i++) { \
320         if (rp1[_i] > col) break; \
321         if (rp1[_i] == col) { \
322           if (addv == ADD_VALUES) { \
323             ap1[_i] += value;   \
324             /* Not sure LogFlops will slow dow the code or not */ \
325             (void)PetscLogFlops(1.0);   \
326            } \
327           else                    ap1[_i] = value; \
328           goto a_noinsert; \
329         } \
330       }  \
331       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
332       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
333       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
334       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
335       N = nrow1++ - 1; a->nz++; high1++; \
336       /* shift up all the later entries in this row */ \
337       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
338       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
339       rp1[_i] = col;  \
340       ap1[_i] = value;  \
341       A->nonzerostate++;\
342       a_noinsert: ; \
343       ailen[row] = nrow1; \
344 }
345 
346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
347   { \
348     if (col <= lastcol2) low2 = 0;                        \
349     else high2 = nrow2;                                   \
350     lastcol2 = col;                                       \
351     while (high2-low2 > 5) {                              \
352       t = (low2+high2)/2;                                 \
353       if (rp2[t] > col) high2 = t;                        \
354       else             low2  = t;                         \
355     }                                                     \
356     for (_i=low2; _i<high2; _i++) {                       \
357       if (rp2[_i] > col) break;                           \
358       if (rp2[_i] == col) {                               \
359         if (addv == ADD_VALUES) {                         \
360           ap2[_i] += value;                               \
361           (void)PetscLogFlops(1.0);                       \
362         }                                                 \
363         else                    ap2[_i] = value;          \
364         goto b_noinsert;                                  \
365       }                                                   \
366     }                                                     \
367     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
368     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
369     PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
370     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
371     N = nrow2++ - 1; b->nz++; high2++;                    \
372     /* shift up all the later entries in this row */      \
373     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
374     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
375     rp2[_i] = col;                                        \
376     ap2[_i] = value;                                      \
377     B->nonzerostate++;                                    \
378     b_noinsert: ;                                         \
379     bilen[row] = nrow2;                                   \
380   }
381 
382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
383 {
384   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
385   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
386   PetscInt       l,*garray = mat->garray,diag;
387   PetscScalar    *aa,*ba;
388 
389   PetscFunctionBegin;
390   /* code only works for square matrices A */
391 
392   /* find size of row to the left of the diagonal part */
393   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
394   row  = row - diag;
395   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
396     if (garray[b->j[b->i[row]+l]] > diag) break;
397   }
398   if (l) {
399     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
400     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
401     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
402   }
403 
404   /* diagonal part */
405   if (a->i[row+1]-a->i[row]) {
406     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
407     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
408     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
409   }
410 
411   /* right of diagonal part */
412   if (b->i[row+1]-b->i[row]-l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
421 {
422   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
423   PetscScalar    value = 0.0;
424   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
425   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
426   PetscBool      roworiented = aij->roworiented;
427 
428   /* Some Variables required in the macro */
429   Mat        A                    = aij->A;
430   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
431   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
432   PetscBool  ignorezeroentries    = a->ignorezeroentries;
433   Mat        B                    = aij->B;
434   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
435   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
436   MatScalar  *aa,*ba;
437   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
438   PetscInt   nonew;
439   MatScalar  *ap1,*ap2;
440 
441   PetscFunctionBegin;
442   PetscCall(MatSeqAIJGetArray(A,&aa));
443   PetscCall(MatSeqAIJGetArray(B,&ba));
444   for (i=0; i<m; i++) {
445     if (im[i] < 0) continue;
446     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
447     if (im[i] >= rstart && im[i] < rend) {
448       row      = im[i] - rstart;
449       lastcol1 = -1;
450       rp1      = aj + ai[row];
451       ap1      = aa + ai[row];
452       rmax1    = aimax[row];
453       nrow1    = ailen[row];
454       low1     = 0;
455       high1    = nrow1;
456       lastcol2 = -1;
457       rp2      = bj + bi[row];
458       ap2      = ba + bi[row];
459       rmax2    = bimax[row];
460       nrow2    = bilen[row];
461       low2     = 0;
462       high2    = nrow2;
463 
464       for (j=0; j<n; j++) {
465         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
466         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
467         if (in[j] >= cstart && in[j] < cend) {
468           col   = in[j] - cstart;
469           nonew = a->nonew;
470           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
471         } else if (in[j] < 0) continue;
472         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
473         else {
474           if (mat->was_assembled) {
475             if (!aij->colmap) {
476               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
477             }
478 #if defined(PETSC_USE_CTABLE)
479             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
480             col--;
481 #else
482             col = aij->colmap[in[j]] - 1;
483 #endif
484             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
485               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
486               col  =  in[j];
487               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
488               B        = aij->B;
489               b        = (Mat_SeqAIJ*)B->data;
490               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
491               rp2      = bj + bi[row];
492               ap2      = ba + bi[row];
493               rmax2    = bimax[row];
494               nrow2    = bilen[row];
495               low2     = 0;
496               high2    = nrow2;
497               bm       = aij->B->rmap->n;
498               ba       = b->a;
499             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
500               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
501                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
502               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
503             }
504           } else col = in[j];
505           nonew = b->nonew;
506           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
507         }
508       }
509     } else {
510       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
515         } else {
516           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
517         }
518       }
519     }
520   }
521   PetscCall(MatSeqAIJRestoreArray(A,&aa));
522   PetscCall(MatSeqAIJRestoreArray(B,&ba));
523   PetscFunctionReturn(0);
524 }
525 
526 /*
527     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
528     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
529     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
530 */
531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
532 {
533   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
534   Mat            A           = aij->A; /* diagonal part of the matrix */
535   Mat            B           = aij->B; /* offdiagonal part of the matrix */
536   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
537   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
538   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
539   PetscInt       *ailen      = a->ilen,*aj = a->j;
540   PetscInt       *bilen      = b->ilen,*bj = b->j;
541   PetscInt       am          = aij->A->rmap->n,j;
542   PetscInt       diag_so_far = 0,dnz;
543   PetscInt       offd_so_far = 0,onz;
544 
545   PetscFunctionBegin;
546   /* Iterate over all rows of the matrix */
547   for (j=0; j<am; j++) {
548     dnz = onz = 0;
549     /*  Iterate over all non-zero columns of the current row */
550     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
551       /* If column is in the diagonal */
552       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553         aj[diag_so_far++] = mat_j[col] - cstart;
554         dnz++;
555       } else { /* off-diagonal entries */
556         bj[offd_so_far++] = mat_j[col];
557         onz++;
558       }
559     }
560     ailen[j] = dnz;
561     bilen[j] = onz;
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
569     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
570     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
571     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572 */
573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
574 {
575   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
576   Mat            A      = aij->A; /* diagonal part of the matrix */
577   Mat            B      = aij->B; /* offdiagonal part of the matrix */
578   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
579   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
580   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
581   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
582   PetscInt       *ailen = a->ilen,*aj = a->j;
583   PetscInt       *bilen = b->ilen,*bj = b->j;
584   PetscInt       am     = aij->A->rmap->n,j;
585   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
587   PetscScalar    *aa = a->a,*ba = b->a;
588 
589   PetscFunctionBegin;
590   /* Iterate over all rows of the matrix */
591   for (j=0; j<am; j++) {
592     dnz_row = onz_row = 0;
593     rowstart_offd = full_offd_i[j];
594     rowstart_diag = full_diag_i[j];
595     /*  Iterate over all non-zero columns of the current row */
596     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
597       /* If column is in the diagonal */
598       if (mat_j[col] >= cstart && mat_j[col] < cend) {
599         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
600         aa[rowstart_diag+dnz_row] = mat_a[col];
601         dnz_row++;
602       } else { /* off-diagonal entries */
603         bj[rowstart_offd+onz_row] = mat_j[col];
604         ba[rowstart_offd+onz_row] = mat_a[col];
605         onz_row++;
606       }
607     }
608     ailen[j] = dnz_row;
609     bilen[j] = onz_row;
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615 {
616   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
617   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
618   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
619 
620   PetscFunctionBegin;
621   for (i=0; i<m; i++) {
622     if (idxm[i] < 0) continue; /* negative row */
623     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
624     if (idxm[i] >= rstart && idxm[i] < rend) {
625       row = idxm[i] - rstart;
626       for (j=0; j<n; j++) {
627         if (idxn[j] < 0) continue; /* negative column */
628         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
629         if (idxn[j] >= cstart && idxn[j] < cend) {
630           col  = idxn[j] - cstart;
631           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
632         } else {
633           if (!aij->colmap) {
634             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
635           }
636 #if defined(PETSC_USE_CTABLE)
637           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
638           col--;
639 #else
640           col = aij->colmap[idxn[j]] - 1;
641 #endif
642           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
643           else {
644             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
645           }
646         }
647       }
648     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
654 {
655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
662   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
663   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscMPIInt    n;
671   PetscInt       i,j,rstart,ncols,flg;
672   PetscInt       *row,*col;
673   PetscBool      other_disassembled;
674   PetscScalar    *val;
675 
676   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
677 
678   PetscFunctionBegin;
679   if (!aij->donotstash && !mat->nooffprocentries) {
680     while (1) {
681       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
682       if (!flg) break;
683 
684       for (i=0; i<n;) {
685         /* Now identify the consecutive vals belonging to the same row */
686         for (j=i,rstart=row[j]; j<n; j++) {
687           if (row[j] != rstart) break;
688         }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
693         i    = j;
694       }
695     }
696     PetscCall(MatStashScatterEnd_Private(&mat->stash));
697   }
698 #if defined(PETSC_HAVE_DEVICE)
699   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
700   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
701   if (mat->boundtocpu) {
702     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
703     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
704   }
705 #endif
706   PetscCall(MatAssemblyBegin(aij->A,mode));
707   PetscCall(MatAssemblyEnd(aij->A,mode));
708 
709   /* determine if any processor has disassembled, if so we must
710      also disassemble ourself, in order that we may reassemble. */
711   /*
712      if nonzero structure of submatrix B cannot change then we know that
713      no processor disassembled thus we can skip this stuff
714   */
715   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
716     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
717     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
718       PetscCall(MatDisAssemble_MPIAIJ(mat));
719     }
720   }
721   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
722     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
723   }
724   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
725 #if defined(PETSC_HAVE_DEVICE)
726   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
727 #endif
728   PetscCall(MatAssemblyBegin(aij->B,mode));
729   PetscCall(MatAssemblyEnd(aij->B,mode));
730 
731   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
732 
733   aij->rowvalues = NULL;
734 
735   PetscCall(VecDestroy(&aij->diag));
736 
737   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
738   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
739     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
740     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
741   }
742 #if defined(PETSC_HAVE_DEVICE)
743   mat->offloadmask = PETSC_OFFLOAD_BOTH;
744 #endif
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
749 {
750   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
751 
752   PetscFunctionBegin;
753   PetscCall(MatZeroEntries(l->A));
754   PetscCall(MatZeroEntries(l->B));
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
759 {
760   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
761   PetscObjectState sA, sB;
762   PetscInt        *lrows;
763   PetscInt         r, len;
764   PetscBool        cong, lch, gch;
765 
766   PetscFunctionBegin;
767   /* get locally owned rows */
768   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
769   PetscCall(MatHasCongruentLayouts(A,&cong));
770   /* fix right hand side if needed */
771   if (x && b) {
772     const PetscScalar *xx;
773     PetscScalar       *bb;
774 
775     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
776     PetscCall(VecGetArrayRead(x, &xx));
777     PetscCall(VecGetArray(b, &bb));
778     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
779     PetscCall(VecRestoreArrayRead(x, &xx));
780     PetscCall(VecRestoreArray(b, &bb));
781   }
782 
783   sA = mat->A->nonzerostate;
784   sB = mat->B->nonzerostate;
785 
786   if (diag != 0.0 && cong) {
787     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
788     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
789   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
790     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
791     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
792     PetscInt   nnwA, nnwB;
793     PetscBool  nnzA, nnzB;
794 
795     nnwA = aijA->nonew;
796     nnwB = aijB->nonew;
797     nnzA = aijA->keepnonzeropattern;
798     nnzB = aijB->keepnonzeropattern;
799     if (!nnzA) {
800       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
801       aijA->nonew = 0;
802     }
803     if (!nnzB) {
804       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
805       aijB->nonew = 0;
806     }
807     /* Must zero here before the next loop */
808     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
809     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
810     for (r = 0; r < len; ++r) {
811       const PetscInt row = lrows[r] + A->rmap->rstart;
812       if (row >= A->cmap->N) continue;
813       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
814     }
815     aijA->nonew = nnwA;
816     aijB->nonew = nnwB;
817   } else {
818     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
819     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
820   }
821   PetscCall(PetscFree(lrows));
822   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
823   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
824 
825   /* reduce nonzerostate */
826   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
827   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
828   if (gch) A->nonzerostate++;
829   PetscFunctionReturn(0);
830 }
831 
832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
833 {
834   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
835   PetscMPIInt       n = A->rmap->n;
836   PetscInt          i,j,r,m,len = 0;
837   PetscInt          *lrows,*owners = A->rmap->range;
838   PetscMPIInt       p = 0;
839   PetscSFNode       *rrows;
840   PetscSF           sf;
841   const PetscScalar *xx;
842   PetscScalar       *bb,*mask,*aij_a;
843   Vec               xmask,lmask;
844   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
845   const PetscInt    *aj, *ii,*ridx;
846   PetscScalar       *aa;
847 
848   PetscFunctionBegin;
849   /* Create SF where leaves are input rows and roots are owned rows */
850   PetscCall(PetscMalloc1(n, &lrows));
851   for (r = 0; r < n; ++r) lrows[r] = -1;
852   PetscCall(PetscMalloc1(N, &rrows));
853   for (r = 0; r < N; ++r) {
854     const PetscInt idx   = rows[r];
855     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
856     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
857       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
858     }
859     rrows[r].rank  = p;
860     rrows[r].index = rows[r] - owners[p];
861   }
862   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
863   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
864   /* Collect flags for rows to be zeroed */
865   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
866   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
867   PetscCall(PetscSFDestroy(&sf));
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
872   /* handle off diagonal part of matrix */
873   PetscCall(MatCreateVecs(A,&xmask,NULL));
874   PetscCall(VecDuplicate(l->lvec,&lmask));
875   PetscCall(VecGetArray(xmask,&bb));
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   PetscCall(VecRestoreArray(xmask,&bb));
878   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
879   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
880   PetscCall(VecDestroy(&xmask));
881   if (x && b) { /* this code is buggy when the row and column layout don't match */
882     PetscBool cong;
883 
884     PetscCall(MatHasCongruentLayouts(A,&cong));
885     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
886     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
887     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
888     PetscCall(VecGetArrayRead(l->lvec,&xx));
889     PetscCall(VecGetArray(b,&bb));
890   }
891   PetscCall(VecGetArray(lmask,&mask));
892   /* remove zeroed rows of off diagonal matrix */
893   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
894   ii = aij->i;
895   for (i=0; i<len; i++) {
896     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
897   }
898   /* loop over all elements of off process part of matrix zeroing removed columns*/
899   if (aij->compressedrow.use) {
900     m    = aij->compressedrow.nrows;
901     ii   = aij->compressedrow.i;
902     ridx = aij->compressedrow.rindex;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij_a + ii[i];
907 
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[*ridx] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916       ridx++;
917     }
918   } else { /* do not use compressed row format */
919     m = l->B->rmap->n;
920     for (i=0; i<m; i++) {
921       n  = ii[i+1] - ii[i];
922       aj = aij->j + ii[i];
923       aa = aij_a + ii[i];
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[i] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932     }
933   }
934   if (x && b) {
935     PetscCall(VecRestoreArray(b,&bb));
936     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
937   }
938   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
939   PetscCall(VecRestoreArray(lmask,&mask));
940   PetscCall(VecDestroy(&lmask));
941   PetscCall(PetscFree(lrows));
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscInt       nt;
955   VecScatter     Mvctx = a->Mvctx;
956 
957   PetscFunctionBegin;
958   PetscCall(VecGetLocalSize(xx,&nt));
959   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
960   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
961   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
962   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
963   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970 
971   PetscFunctionBegin;
972   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   VecScatter     Mvctx = a->Mvctx;
980 
981   PetscFunctionBegin;
982   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
983   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
984   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
985   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992 
993   PetscFunctionBegin;
994   /* do nondiagonal part */
995   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
996   /* do local part */
997   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
998   /* add partial results together */
999   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1000   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1005 {
1006   MPI_Comm       comm;
1007   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1008   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1009   IS             Me,Notme;
1010   PetscInt       M,N,first,last,*notme,i;
1011   PetscBool      lf;
1012   PetscMPIInt    size;
1013 
1014   PetscFunctionBegin;
1015   /* Easy test: symmetric diagonal block */
1016   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1017   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1018   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1019   if (!*f) PetscFunctionReturn(0);
1020   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1021   PetscCallMPI(MPI_Comm_size(comm,&size));
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025   PetscCall(MatGetSize(Amat,&M,&N));
1026   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1027   PetscCall(PetscMalloc1(N-last+first,&notme));
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1031   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1032   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1033   Aoff = Aoffs[0];
1034   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1035   Boff = Boffs[0];
1036   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1037   PetscCall(MatDestroyMatrices(1,&Aoffs));
1038   PetscCall(MatDestroyMatrices(1,&Boffs));
1039   PetscCall(ISDestroy(&Me));
1040   PetscCall(ISDestroy(&Notme));
1041   PetscCall(PetscFree(notme));
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046 {
1047   PetscFunctionBegin;
1048   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055 
1056   PetscFunctionBegin;
1057   /* do nondiagonal part */
1058   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1059   /* do local part */
1060   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1061   /* add partial results together */
1062   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1063   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1077   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1078   PetscCall(MatGetDiagonal(a->A,v));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1083 {
1084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1085 
1086   PetscFunctionBegin;
1087   PetscCall(MatScale(a->A,aa));
1088   PetscCall(MatScale(a->B,aa));
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1094 {
1095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1096 
1097   PetscFunctionBegin;
1098   PetscCall(PetscSFDestroy(&aij->coo_sf));
1099   PetscCall(PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1));
1100   PetscCall(PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2));
1101   PetscCall(PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2));
1102   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1103   PetscCall(PetscFree(aij->Cperm1));
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110 
1111   PetscFunctionBegin;
1112 #if defined(PETSC_USE_LOG)
1113   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1114 #endif
1115   PetscCall(MatStashDestroy_Private(&mat->stash));
1116   PetscCall(VecDestroy(&aij->diag));
1117   PetscCall(MatDestroy(&aij->A));
1118   PetscCall(MatDestroy(&aij->B));
1119 #if defined(PETSC_USE_CTABLE)
1120   PetscCall(PetscTableDestroy(&aij->colmap));
1121 #else
1122   PetscCall(PetscFree(aij->colmap));
1123 #endif
1124   PetscCall(PetscFree(aij->garray));
1125   PetscCall(VecDestroy(&aij->lvec));
1126   PetscCall(VecScatterDestroy(&aij->Mvctx));
1127   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1128   PetscCall(PetscFree(aij->ld));
1129 
1130   /* Free COO */
1131   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1132 
1133   PetscCall(PetscFree(mat->data));
1134 
1135   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1136   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1137 
1138   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1139   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1140   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1141   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1142   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1143   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1144   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1145   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1146   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1147   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1148 #if defined(PETSC_HAVE_CUDA)
1149   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1150 #endif
1151 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1152   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1153 #endif
1154   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1155 #if defined(PETSC_HAVE_ELEMENTAL)
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1157 #endif
1158 #if defined(PETSC_HAVE_SCALAPACK)
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1160 #endif
1161 #if defined(PETSC_HAVE_HYPRE)
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1164 #endif
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1171 #if defined(PETSC_HAVE_MKL_SPARSE)
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1173 #endif
1174   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1183 {
1184   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1185   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1186   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1187   const PetscInt    *garray = aij->garray;
1188   const PetscScalar *aa,*ba;
1189   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1190   PetscInt          *rowlens;
1191   PetscInt          *colidxs;
1192   PetscScalar       *matvals;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(PetscViewerSetUp(viewer));
1196 
1197   M  = mat->rmap->N;
1198   N  = mat->cmap->N;
1199   m  = mat->rmap->n;
1200   rs = mat->rmap->rstart;
1201   cs = mat->cmap->rstart;
1202   nz = A->nz + B->nz;
1203 
1204   /* write matrix header */
1205   header[0] = MAT_FILE_CLASSID;
1206   header[1] = M; header[2] = N; header[3] = nz;
1207   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1208   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1209 
1210   /* fill in and store row lengths  */
1211   PetscCall(PetscMalloc1(m,&rowlens));
1212   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1213   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1214   PetscCall(PetscFree(rowlens));
1215 
1216   /* fill in and store column indices */
1217   PetscCall(PetscMalloc1(nz,&colidxs));
1218   for (cnt=0, i=0; i<m; i++) {
1219     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1220       if (garray[B->j[jb]] > cs) break;
1221       colidxs[cnt++] = garray[B->j[jb]];
1222     }
1223     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1224       colidxs[cnt++] = A->j[ja] + cs;
1225     for (; jb<B->i[i+1]; jb++)
1226       colidxs[cnt++] = garray[B->j[jb]];
1227   }
1228   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1229   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1230   PetscCall(PetscFree(colidxs));
1231 
1232   /* fill in and store nonzero values */
1233   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1234   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1235   PetscCall(PetscMalloc1(nz,&matvals));
1236   for (cnt=0, i=0; i<m; i++) {
1237     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       matvals[cnt++] = ba[jb];
1240     }
1241     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1242       matvals[cnt++] = aa[ja];
1243     for (; jb<B->i[i+1]; jb++)
1244       matvals[cnt++] = ba[jb];
1245   }
1246   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1247   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1248   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1249   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1250   PetscCall(PetscFree(matvals));
1251 
1252   /* write block size option to the viewer's .info file */
1253   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #include <petscdraw.h>
1258 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1259 {
1260   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1261   PetscMPIInt       rank = aij->rank,size = aij->size;
1262   PetscBool         isdraw,iascii,isbinary;
1263   PetscViewer       sviewer;
1264   PetscViewerFormat format;
1265 
1266   PetscFunctionBegin;
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1268   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1269   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1270   if (iascii) {
1271     PetscCall(PetscViewerGetFormat(viewer,&format));
1272     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1273       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1274       PetscCall(PetscMalloc1(size,&nz));
1275       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1276       for (i=0; i<(PetscInt)size; i++) {
1277         nmax = PetscMax(nmax,nz[i]);
1278         nmin = PetscMin(nmin,nz[i]);
1279         navg += nz[i];
1280       }
1281       PetscCall(PetscFree(nz));
1282       navg = navg/size;
1283       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1284       PetscFunctionReturn(0);
1285     }
1286     PetscCall(PetscViewerGetFormat(viewer,&format));
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscInt *inodes=NULL;
1290 
1291       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1292       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1293       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1294       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1295       if (!inodes) {
1296         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1297                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1298       } else {
1299         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1300                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1301       }
1302       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1303       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1304       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1305       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1306       PetscCall(PetscViewerFlush(viewer));
1307       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1308       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1309       PetscCall(VecScatterView(aij->Mvctx,viewer));
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1314       if (inodes) {
1315         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1316       } else {
1317         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1326       PetscCall(MatView(aij->A,viewer));
1327     } else {
1328       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (iascii && size == 1) {
1332     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1333     PetscCall(MatView(aij->A,viewer));
1334     PetscFunctionReturn(0);
1335   } else if (isdraw) {
1336     PetscDraw draw;
1337     PetscBool isnull;
1338     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1339     PetscCall(PetscDrawIsNull(draw,&isnull));
1340     if (isnull) PetscFunctionReturn(0);
1341   }
1342 
1343   { /* assemble the entire matrix onto first processor */
1344     Mat A = NULL, Av;
1345     IS  isrow,iscol;
1346 
1347     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1348     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1349     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1350     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1351 /*  The commented code uses MatCreateSubMatrices instead */
1352 /*
1353     Mat *AA, A = NULL, Av;
1354     IS  isrow,iscol;
1355 
1356     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1357     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1358     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1359     if (rank == 0) {
1360        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1361        A    = AA[0];
1362        Av   = AA[0];
1363     }
1364     PetscCall(MatDestroySubMatrices(1,&AA));
1365 */
1366     PetscCall(ISDestroy(&iscol));
1367     PetscCall(ISDestroy(&isrow));
1368     /*
1369        Everyone has to call to draw the matrix since the graphics waits are
1370        synchronized across all processors that share the PetscDraw object
1371     */
1372     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1373     if (rank == 0) {
1374       if (((PetscObject)mat)->name) {
1375         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1376       }
1377       PetscCall(MatView_SeqAIJ(Av,sviewer));
1378     }
1379     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1380     PetscCall(PetscViewerFlush(viewer));
1381     PetscCall(MatDestroy(&A));
1382   }
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1387 {
1388   PetscBool      iascii,isdraw,issocket,isbinary;
1389 
1390   PetscFunctionBegin;
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1395   if (iascii || isdraw || isbinary || issocket) {
1396     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1402 {
1403   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1404   Vec            bb1 = NULL;
1405   PetscBool      hasop;
1406 
1407   PetscFunctionBegin;
1408   if (flag == SOR_APPLY_UPPER) {
1409     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1410     PetscFunctionReturn(0);
1411   }
1412 
1413   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1414     PetscCall(VecDuplicate(bb,&bb1));
1415   }
1416 
1417   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1418     if (flag & SOR_ZERO_INITIAL_GUESS) {
1419       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1420       its--;
1421     }
1422 
1423     while (its--) {
1424       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1425       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1426 
1427       /* update rhs: bb1 = bb - B*x */
1428       PetscCall(VecScale(mat->lvec,-1.0));
1429       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1430 
1431       /* local sweep */
1432       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1433     }
1434   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1435     if (flag & SOR_ZERO_INITIAL_GUESS) {
1436       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1437       its--;
1438     }
1439     while (its--) {
1440       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1441       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1442 
1443       /* update rhs: bb1 = bb - B*x */
1444       PetscCall(VecScale(mat->lvec,-1.0));
1445       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1446 
1447       /* local sweep */
1448       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1449     }
1450   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1451     if (flag & SOR_ZERO_INITIAL_GUESS) {
1452       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1453       its--;
1454     }
1455     while (its--) {
1456       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1457       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       PetscCall(VecScale(mat->lvec,-1.0));
1461       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1462 
1463       /* local sweep */
1464       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1465     }
1466   } else if (flag & SOR_EISENSTAT) {
1467     Vec xx1;
1468 
1469     PetscCall(VecDuplicate(bb,&xx1));
1470     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1471 
1472     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1473     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1474     if (!mat->diag) {
1475       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1476       PetscCall(MatGetDiagonal(matin,mat->diag));
1477     }
1478     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1479     if (hasop) {
1480       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1481     } else {
1482       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1483     }
1484     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1485 
1486     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1487 
1488     /* local sweep */
1489     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1490     PetscCall(VecAXPY(xx,1.0,xx1));
1491     PetscCall(VecDestroy(&xx1));
1492   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1493 
1494   PetscCall(VecDestroy(&bb1));
1495 
1496   matin->factorerrortype = mat->A->factorerrortype;
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1501 {
1502   Mat            aA,aB,Aperm;
1503   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1504   PetscScalar    *aa,*ba;
1505   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1506   PetscSF        rowsf,sf;
1507   IS             parcolp = NULL;
1508   PetscBool      done;
1509 
1510   PetscFunctionBegin;
1511   PetscCall(MatGetLocalSize(A,&m,&n));
1512   PetscCall(ISGetIndices(rowp,&rwant));
1513   PetscCall(ISGetIndices(colp,&cwant));
1514   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1515 
1516   /* Invert row permutation to find out where my rows should go */
1517   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1518   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1519   PetscCall(PetscSFSetFromOptions(rowsf));
1520   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1521   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1522   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1523 
1524   /* Invert column permutation to find out where my columns should go */
1525   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1526   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1527   PetscCall(PetscSFSetFromOptions(sf));
1528   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1529   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1530   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1531   PetscCall(PetscSFDestroy(&sf));
1532 
1533   PetscCall(ISRestoreIndices(rowp,&rwant));
1534   PetscCall(ISRestoreIndices(colp,&cwant));
1535   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1536 
1537   /* Find out where my gcols should go */
1538   PetscCall(MatGetSize(aB,NULL,&ng));
1539   PetscCall(PetscMalloc1(ng,&gcdest));
1540   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1541   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1542   PetscCall(PetscSFSetFromOptions(sf));
1543   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1544   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1545   PetscCall(PetscSFDestroy(&sf));
1546 
1547   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1548   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1549   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1550   for (i=0; i<m; i++) {
1551     PetscInt    row = rdest[i];
1552     PetscMPIInt rowner;
1553     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1554     for (j=ai[i]; j<ai[i+1]; j++) {
1555       PetscInt    col = cdest[aj[j]];
1556       PetscMPIInt cowner;
1557       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1558       if (rowner == cowner) dnnz[i]++;
1559       else onnz[i]++;
1560     }
1561     for (j=bi[i]; j<bi[i+1]; j++) {
1562       PetscInt    col = gcdest[bj[j]];
1563       PetscMPIInt cowner;
1564       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568   }
1569   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1570   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1571   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1572   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1573   PetscCall(PetscSFDestroy(&rowsf));
1574 
1575   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1576   PetscCall(MatSeqAIJGetArray(aA,&aa));
1577   PetscCall(MatSeqAIJGetArray(aB,&ba));
1578   for (i=0; i<m; i++) {
1579     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1580     PetscInt j0,rowlen;
1581     rowlen = ai[i+1] - ai[i];
1582     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1583       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1584       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1585     }
1586     rowlen = bi[i+1] - bi[i];
1587     for (j0=j=0; j<rowlen; j0=j) {
1588       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1589       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1590     }
1591   }
1592   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1593   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1594   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1595   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1596   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1597   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1598   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1599   PetscCall(PetscFree3(work,rdest,cdest));
1600   PetscCall(PetscFree(gcdest));
1601   if (parcolp) PetscCall(ISDestroy(&colp));
1602   *B = Aperm;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1607 {
1608   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1612   if (ghosts) *ghosts = aij->garray;
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1617 {
1618   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1619   Mat            A    = mat->A,B = mat->B;
1620   PetscLogDouble isend[5],irecv[5];
1621 
1622   PetscFunctionBegin;
1623   info->block_size = 1.0;
1624   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1625 
1626   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1627   isend[3] = info->memory;  isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1630 
1631   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1632   isend[3] += info->memory;  isend[4] += info->mallocs;
1633   if (flag == MAT_LOCAL) {
1634     info->nz_used      = isend[0];
1635     info->nz_allocated = isend[1];
1636     info->nz_unneeded  = isend[2];
1637     info->memory       = isend[3];
1638     info->mallocs      = isend[4];
1639   } else if (flag == MAT_GLOBAL_MAX) {
1640     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1641 
1642     info->nz_used      = irecv[0];
1643     info->nz_allocated = irecv[1];
1644     info->nz_unneeded  = irecv[2];
1645     info->memory       = irecv[3];
1646     info->mallocs      = irecv[4];
1647   } else if (flag == MAT_GLOBAL_SUM) {
1648     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1649 
1650     info->nz_used      = irecv[0];
1651     info->nz_allocated = irecv[1];
1652     info->nz_unneeded  = irecv[2];
1653     info->memory       = irecv[3];
1654     info->mallocs      = irecv[4];
1655   }
1656   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1657   info->fill_ratio_needed = 0;
1658   info->factor_mallocs    = 0;
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1663 {
1664   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1665 
1666   PetscFunctionBegin;
1667   switch (op) {
1668   case MAT_NEW_NONZERO_LOCATIONS:
1669   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1670   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1671   case MAT_KEEP_NONZERO_PATTERN:
1672   case MAT_NEW_NONZERO_LOCATION_ERR:
1673   case MAT_USE_INODES:
1674   case MAT_IGNORE_ZERO_ENTRIES:
1675   case MAT_FORM_EXPLICIT_TRANSPOSE:
1676     MatCheckPreallocated(A,1);
1677     PetscCall(MatSetOption(a->A,op,flg));
1678     PetscCall(MatSetOption(a->B,op,flg));
1679     break;
1680   case MAT_ROW_ORIENTED:
1681     MatCheckPreallocated(A,1);
1682     a->roworiented = flg;
1683 
1684     PetscCall(MatSetOption(a->A,op,flg));
1685     PetscCall(MatSetOption(a->B,op,flg));
1686     break;
1687   case MAT_FORCE_DIAGONAL_ENTRIES:
1688   case MAT_SORTED_FULL:
1689     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1690     break;
1691   case MAT_IGNORE_OFF_PROC_ENTRIES:
1692     a->donotstash = flg;
1693     break;
1694   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1695   case MAT_SPD:
1696   case MAT_SYMMETRIC:
1697   case MAT_STRUCTURALLY_SYMMETRIC:
1698   case MAT_HERMITIAN:
1699   case MAT_SYMMETRY_ETERNAL:
1700     break;
1701   case MAT_SUBMAT_SINGLEIS:
1702     A->submat_singleis = flg;
1703     break;
1704   case MAT_STRUCTURE_ONLY:
1705     /* The option is handled directly by MatSetOption() */
1706     break;
1707   default:
1708     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1714 {
1715   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1716   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1717   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1718   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1719   PetscInt       *cmap,*idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1730     PetscInt   max = 1,tmp;
1731     for (i=0; i<matin->rmap->n; i++) {
1732       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1736   }
1737 
1738   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1742   if (!v)   {pvA = NULL; pvB = NULL;}
1743   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1744   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1745   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1746   nztot = nzA + nzB;
1747 
1748   cmap = mat->garray;
1749   if (v  || idx) {
1750     if (nztot) {
1751       /* Sort by increasing column numbers, assuming A and B already sorted */
1752       PetscInt imark = -1;
1753       if (v) {
1754         *v = v_p = mat->rowvalues;
1755         for (i=0; i<nzB; i++) {
1756           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1757           else break;
1758         }
1759         imark = i;
1760         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1761         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1762       }
1763       if (idx) {
1764         *idx = idx_p = mat->rowindices;
1765         if (imark > -1) {
1766           for (i=0; i<imark; i++) {
1767             idx_p[i] = cmap[cworkB[i]];
1768           }
1769         } else {
1770           for (i=0; i<nzB; i++) {
1771             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1772             else break;
1773           }
1774           imark = i;
1775         }
1776         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1777         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1778       }
1779     } else {
1780       if (idx) *idx = NULL;
1781       if (v)   *v   = NULL;
1782     }
1783   }
1784   *nz  = nztot;
1785   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1786   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1791 {
1792   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1793 
1794   PetscFunctionBegin;
1795   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1796   aij->getrowactive = PETSC_FALSE;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1801 {
1802   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1803   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1804   PetscInt        i,j,cstart = mat->cmap->rstart;
1805   PetscReal       sum = 0.0;
1806   const MatScalar *v,*amata,*bmata;
1807 
1808   PetscFunctionBegin;
1809   if (aij->size == 1) {
1810     PetscCall(MatNorm(aij->A,type,norm));
1811   } else {
1812     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1813     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1814     if (type == NORM_FROBENIUS) {
1815       v = amata;
1816       for (i=0; i<amat->nz; i++) {
1817         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1818       }
1819       v = bmata;
1820       for (i=0; i<bmat->nz; i++) {
1821         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1822       }
1823       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1824       *norm = PetscSqrtReal(*norm);
1825       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1826     } else if (type == NORM_1) { /* max column norm */
1827       PetscReal *tmp,*tmp2;
1828       PetscInt  *jj,*garray = aij->garray;
1829       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1830       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1831       *norm = 0.0;
1832       v     = amata; jj = amat->j;
1833       for (j=0; j<amat->nz; j++) {
1834         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1835       }
1836       v = bmata; jj = bmat->j;
1837       for (j=0; j<bmat->nz; j++) {
1838         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1839       }
1840       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1841       for (j=0; j<mat->cmap->N; j++) {
1842         if (tmp2[j] > *norm) *norm = tmp2[j];
1843       }
1844       PetscCall(PetscFree(tmp));
1845       PetscCall(PetscFree(tmp2));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1847     } else if (type == NORM_INFINITY) { /* max row norm */
1848       PetscReal ntemp = 0.0;
1849       for (j=0; j<aij->A->rmap->n; j++) {
1850         v   = amata + amat->i[j];
1851         sum = 0.0;
1852         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1853           sum += PetscAbsScalar(*v); v++;
1854         }
1855         v = bmata + bmat->i[j];
1856         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1857           sum += PetscAbsScalar(*v); v++;
1858         }
1859         if (sum > ntemp) ntemp = sum;
1860       }
1861       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1862       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1863     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1864     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1865     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1871 {
1872   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1873   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1874   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1875   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1876   Mat             B,A_diag,*B_diag;
1877   const MatScalar *pbv,*bv;
1878 
1879   PetscFunctionBegin;
1880   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1881   ai = Aloc->i; aj = Aloc->j;
1882   bi = Bloc->i; bj = Bloc->j;
1883   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1884     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1885     PetscSFNode          *oloc;
1886     PETSC_UNUSED PetscSF sf;
1887 
1888     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1889     /* compute d_nnz for preallocation */
1890     PetscCall(PetscArrayzero(d_nnz,na));
1891     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1892     /* compute local off-diagonal contributions */
1893     PetscCall(PetscArrayzero(g_nnz,nb));
1894     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1895     /* map those to global */
1896     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1897     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1898     PetscCall(PetscSFSetFromOptions(sf));
1899     PetscCall(PetscArrayzero(o_nnz,na));
1900     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1901     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1902     PetscCall(PetscSFDestroy(&sf));
1903 
1904     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1905     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1906     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1907     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1908     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1909     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1910   } else {
1911     B    = *matout;
1912     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1913   }
1914 
1915   b           = (Mat_MPIAIJ*)B->data;
1916   A_diag      = a->A;
1917   B_diag      = &b->A;
1918   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1919   A_diag_ncol = A_diag->cmap->N;
1920   B_diag_ilen = sub_B_diag->ilen;
1921   B_diag_i    = sub_B_diag->i;
1922 
1923   /* Set ilen for diagonal of B */
1924   for (i=0; i<A_diag_ncol; i++) {
1925     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1926   }
1927 
1928   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1929   very quickly (=without using MatSetValues), because all writes are local. */
1930   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1931 
1932   /* copy over the B part */
1933   PetscCall(PetscMalloc1(bi[mb],&cols));
1934   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1935   pbv  = bv;
1936   row  = A->rmap->rstart;
1937   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1938   cols_tmp = cols;
1939   for (i=0; i<mb; i++) {
1940     ncol = bi[i+1]-bi[i];
1941     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1942     row++;
1943     pbv += ncol; cols_tmp += ncol;
1944   }
1945   PetscCall(PetscFree(cols));
1946   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1947 
1948   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1949   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1950   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1951     *matout = B;
1952   } else {
1953     PetscCall(MatHeaderMerge(A,&B));
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1959 {
1960   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1961   Mat            a    = aij->A,b = aij->B;
1962   PetscInt       s1,s2,s3;
1963 
1964   PetscFunctionBegin;
1965   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1966   if (rr) {
1967     PetscCall(VecGetLocalSize(rr,&s1));
1968     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1969     /* Overlap communication with computation. */
1970     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1971   }
1972   if (ll) {
1973     PetscCall(VecGetLocalSize(ll,&s1));
1974     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1975     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1976   }
1977   /* scale  the diagonal block */
1978   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1979 
1980   if (rr) {
1981     /* Do a scatter end and then right scale the off-diagonal block */
1982     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1983     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1989 {
1990   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1991 
1992   PetscFunctionBegin;
1993   PetscCall(MatSetUnfactored(a->A));
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1998 {
1999   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2000   Mat            a,b,c,d;
2001   PetscBool      flg;
2002 
2003   PetscFunctionBegin;
2004   a = matA->A; b = matA->B;
2005   c = matB->A; d = matB->B;
2006 
2007   PetscCall(MatEqual(a,c,&flg));
2008   if (flg) {
2009     PetscCall(MatEqual(b,d,&flg));
2010   }
2011   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2016 {
2017   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2018   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2019 
2020   PetscFunctionBegin;
2021   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2022   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2023     /* because of the column compression in the off-processor part of the matrix a->B,
2024        the number of columns in a->B and b->B may be different, hence we cannot call
2025        the MatCopy() directly on the two parts. If need be, we can provide a more
2026        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2027        then copying the submatrices */
2028     PetscCall(MatCopy_Basic(A,B,str));
2029   } else {
2030     PetscCall(MatCopy(a->A,b->A,str));
2031     PetscCall(MatCopy(a->B,b->B,str));
2032   }
2033   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2038 {
2039   PetscFunctionBegin;
2040   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 /*
2045    Computes the number of nonzeros per row needed for preallocation when X and Y
2046    have different nonzero structure.
2047 */
2048 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2049 {
2050   PetscInt       i,j,k,nzx,nzy;
2051 
2052   PetscFunctionBegin;
2053   /* Set the number of nonzeros in the new matrix */
2054   for (i=0; i<m; i++) {
2055     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2056     nzx = xi[i+1] - xi[i];
2057     nzy = yi[i+1] - yi[i];
2058     nnz[i] = 0;
2059     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2060       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2061       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2062       nnz[i]++;
2063     }
2064     for (; k<nzy; k++) nnz[i]++;
2065   }
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2070 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2071 {
2072   PetscInt       m = Y->rmap->N;
2073   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2074   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2075 
2076   PetscFunctionBegin;
2077   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2082 {
2083   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2084 
2085   PetscFunctionBegin;
2086   if (str == SAME_NONZERO_PATTERN) {
2087     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2088     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2089   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2090     PetscCall(MatAXPY_Basic(Y,a,X,str));
2091   } else {
2092     Mat      B;
2093     PetscInt *nnz_d,*nnz_o;
2094 
2095     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2096     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2097     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2098     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2099     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2100     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2101     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2102     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2103     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2104     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2105     PetscCall(MatHeaderMerge(Y,&B));
2106     PetscCall(PetscFree(nnz_d));
2107     PetscCall(PetscFree(nnz_o));
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2113 
2114 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2115 {
2116   PetscFunctionBegin;
2117   if (PetscDefined(USE_COMPLEX)) {
2118     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2119 
2120     PetscCall(MatConjugate_SeqAIJ(aij->A));
2121     PetscCall(MatConjugate_SeqAIJ(aij->B));
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2127 {
2128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2129 
2130   PetscFunctionBegin;
2131   PetscCall(MatRealPart(a->A));
2132   PetscCall(MatRealPart(a->B));
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2137 {
2138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2139 
2140   PetscFunctionBegin;
2141   PetscCall(MatImaginaryPart(a->A));
2142   PetscCall(MatImaginaryPart(a->B));
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2147 {
2148   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2149   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2150   PetscScalar       *va,*vv;
2151   Vec               vB,vA;
2152   const PetscScalar *vb;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2156   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2157 
2158   PetscCall(VecGetArrayWrite(vA,&va));
2159   if (idx) {
2160     for (i=0; i<m; i++) {
2161       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2162     }
2163   }
2164 
2165   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2166   PetscCall(PetscMalloc1(m,&idxb));
2167   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2168 
2169   PetscCall(VecGetArrayWrite(v,&vv));
2170   PetscCall(VecGetArrayRead(vB,&vb));
2171   for (i=0; i<m; i++) {
2172     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2173       vv[i] = vb[i];
2174       if (idx) idx[i] = a->garray[idxb[i]];
2175     } else {
2176       vv[i] = va[i];
2177       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2178         idx[i] = a->garray[idxb[i]];
2179     }
2180   }
2181   PetscCall(VecRestoreArrayWrite(vA,&vv));
2182   PetscCall(VecRestoreArrayWrite(vA,&va));
2183   PetscCall(VecRestoreArrayRead(vB,&vb));
2184   PetscCall(PetscFree(idxb));
2185   PetscCall(VecDestroy(&vA));
2186   PetscCall(VecDestroy(&vB));
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2191 {
2192   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2193   PetscInt          m = A->rmap->n,n = A->cmap->n;
2194   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2195   PetscInt          *cmap  = mat->garray;
2196   PetscInt          *diagIdx, *offdiagIdx;
2197   Vec               diagV, offdiagV;
2198   PetscScalar       *a, *diagA, *offdiagA;
2199   const PetscScalar *ba,*bav;
2200   PetscInt          r,j,col,ncols,*bi,*bj;
2201   Mat               B = mat->B;
2202   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2203 
2204   PetscFunctionBegin;
2205   /* When a process holds entire A and other processes have no entry */
2206   if (A->cmap->N == n) {
2207     PetscCall(VecGetArrayWrite(v,&diagA));
2208     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2209     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2210     PetscCall(VecDestroy(&diagV));
2211     PetscCall(VecRestoreArrayWrite(v,&diagA));
2212     PetscFunctionReturn(0);
2213   } else if (n == 0) {
2214     if (m) {
2215       PetscCall(VecGetArrayWrite(v,&a));
2216       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2217       PetscCall(VecRestoreArrayWrite(v,&a));
2218     }
2219     PetscFunctionReturn(0);
2220   }
2221 
2222   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2223   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2224   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2225   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2226 
2227   /* Get offdiagIdx[] for implicit 0.0 */
2228   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2229   ba   = bav;
2230   bi   = b->i;
2231   bj   = b->j;
2232   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2233   for (r = 0; r < m; r++) {
2234     ncols = bi[r+1] - bi[r];
2235     if (ncols == A->cmap->N - n) { /* Brow is dense */
2236       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2237     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2238       offdiagA[r] = 0.0;
2239 
2240       /* Find first hole in the cmap */
2241       for (j=0; j<ncols; j++) {
2242         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2243         if (col > j && j < cstart) {
2244           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2245           break;
2246         } else if (col > j + n && j >= cstart) {
2247           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2248           break;
2249         }
2250       }
2251       if (j == ncols && ncols < A->cmap->N - n) {
2252         /* a hole is outside compressed Bcols */
2253         if (ncols == 0) {
2254           if (cstart) {
2255             offdiagIdx[r] = 0;
2256           } else offdiagIdx[r] = cend;
2257         } else { /* ncols > 0 */
2258           offdiagIdx[r] = cmap[ncols-1] + 1;
2259           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2260         }
2261       }
2262     }
2263 
2264     for (j=0; j<ncols; j++) {
2265       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2266       ba++; bj++;
2267     }
2268   }
2269 
2270   PetscCall(VecGetArrayWrite(v, &a));
2271   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2272   for (r = 0; r < m; ++r) {
2273     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2274       a[r]   = diagA[r];
2275       if (idx) idx[r] = cstart + diagIdx[r];
2276     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2277       a[r] = diagA[r];
2278       if (idx) {
2279         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2280           idx[r] = cstart + diagIdx[r];
2281         } else idx[r] = offdiagIdx[r];
2282       }
2283     } else {
2284       a[r]   = offdiagA[r];
2285       if (idx) idx[r] = offdiagIdx[r];
2286     }
2287   }
2288   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2289   PetscCall(VecRestoreArrayWrite(v, &a));
2290   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2291   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2292   PetscCall(VecDestroy(&diagV));
2293   PetscCall(VecDestroy(&offdiagV));
2294   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2299 {
2300   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2301   PetscInt          m = A->rmap->n,n = A->cmap->n;
2302   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2303   PetscInt          *cmap  = mat->garray;
2304   PetscInt          *diagIdx, *offdiagIdx;
2305   Vec               diagV, offdiagV;
2306   PetscScalar       *a, *diagA, *offdiagA;
2307   const PetscScalar *ba,*bav;
2308   PetscInt          r,j,col,ncols,*bi,*bj;
2309   Mat               B = mat->B;
2310   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2311 
2312   PetscFunctionBegin;
2313   /* When a process holds entire A and other processes have no entry */
2314   if (A->cmap->N == n) {
2315     PetscCall(VecGetArrayWrite(v,&diagA));
2316     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2317     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2318     PetscCall(VecDestroy(&diagV));
2319     PetscCall(VecRestoreArrayWrite(v,&diagA));
2320     PetscFunctionReturn(0);
2321   } else if (n == 0) {
2322     if (m) {
2323       PetscCall(VecGetArrayWrite(v,&a));
2324       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2325       PetscCall(VecRestoreArrayWrite(v,&a));
2326     }
2327     PetscFunctionReturn(0);
2328   }
2329 
2330   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2331   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2332   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2333   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2334 
2335   /* Get offdiagIdx[] for implicit 0.0 */
2336   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2337   ba   = bav;
2338   bi   = b->i;
2339   bj   = b->j;
2340   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2341   for (r = 0; r < m; r++) {
2342     ncols = bi[r+1] - bi[r];
2343     if (ncols == A->cmap->N - n) { /* Brow is dense */
2344       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2345     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2346       offdiagA[r] = 0.0;
2347 
2348       /* Find first hole in the cmap */
2349       for (j=0; j<ncols; j++) {
2350         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2351         if (col > j && j < cstart) {
2352           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2353           break;
2354         } else if (col > j + n && j >= cstart) {
2355           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2356           break;
2357         }
2358       }
2359       if (j == ncols && ncols < A->cmap->N - n) {
2360         /* a hole is outside compressed Bcols */
2361         if (ncols == 0) {
2362           if (cstart) {
2363             offdiagIdx[r] = 0;
2364           } else offdiagIdx[r] = cend;
2365         } else { /* ncols > 0 */
2366           offdiagIdx[r] = cmap[ncols-1] + 1;
2367           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2368         }
2369       }
2370     }
2371 
2372     for (j=0; j<ncols; j++) {
2373       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2374       ba++; bj++;
2375     }
2376   }
2377 
2378   PetscCall(VecGetArrayWrite(v, &a));
2379   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2380   for (r = 0; r < m; ++r) {
2381     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2382       a[r]   = diagA[r];
2383       if (idx) idx[r] = cstart + diagIdx[r];
2384     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2385       a[r] = diagA[r];
2386       if (idx) {
2387         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2388           idx[r] = cstart + diagIdx[r];
2389         } else idx[r] = offdiagIdx[r];
2390       }
2391     } else {
2392       a[r]   = offdiagA[r];
2393       if (idx) idx[r] = offdiagIdx[r];
2394     }
2395   }
2396   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2397   PetscCall(VecRestoreArrayWrite(v, &a));
2398   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2399   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2400   PetscCall(VecDestroy(&diagV));
2401   PetscCall(VecDestroy(&offdiagV));
2402   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2407 {
2408   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2409   PetscInt          m = A->rmap->n,n = A->cmap->n;
2410   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2411   PetscInt          *cmap  = mat->garray;
2412   PetscInt          *diagIdx, *offdiagIdx;
2413   Vec               diagV, offdiagV;
2414   PetscScalar       *a, *diagA, *offdiagA;
2415   const PetscScalar *ba,*bav;
2416   PetscInt          r,j,col,ncols,*bi,*bj;
2417   Mat               B = mat->B;
2418   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2419 
2420   PetscFunctionBegin;
2421   /* When a process holds entire A and other processes have no entry */
2422   if (A->cmap->N == n) {
2423     PetscCall(VecGetArrayWrite(v,&diagA));
2424     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2425     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2426     PetscCall(VecDestroy(&diagV));
2427     PetscCall(VecRestoreArrayWrite(v,&diagA));
2428     PetscFunctionReturn(0);
2429   } else if (n == 0) {
2430     if (m) {
2431       PetscCall(VecGetArrayWrite(v,&a));
2432       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2433       PetscCall(VecRestoreArrayWrite(v,&a));
2434     }
2435     PetscFunctionReturn(0);
2436   }
2437 
2438   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2439   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2440   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2441   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2442 
2443   /* Get offdiagIdx[] for implicit 0.0 */
2444   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2445   ba   = bav;
2446   bi   = b->i;
2447   bj   = b->j;
2448   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2449   for (r = 0; r < m; r++) {
2450     ncols = bi[r+1] - bi[r];
2451     if (ncols == A->cmap->N - n) { /* Brow is dense */
2452       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2453     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2454       offdiagA[r] = 0.0;
2455 
2456       /* Find first hole in the cmap */
2457       for (j=0; j<ncols; j++) {
2458         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2459         if (col > j && j < cstart) {
2460           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2461           break;
2462         } else if (col > j + n && j >= cstart) {
2463           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2464           break;
2465         }
2466       }
2467       if (j == ncols && ncols < A->cmap->N - n) {
2468         /* a hole is outside compressed Bcols */
2469         if (ncols == 0) {
2470           if (cstart) {
2471             offdiagIdx[r] = 0;
2472           } else offdiagIdx[r] = cend;
2473         } else { /* ncols > 0 */
2474           offdiagIdx[r] = cmap[ncols-1] + 1;
2475           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2476         }
2477       }
2478     }
2479 
2480     for (j=0; j<ncols; j++) {
2481       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2482       ba++; bj++;
2483     }
2484   }
2485 
2486   PetscCall(VecGetArrayWrite(v,    &a));
2487   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2488   for (r = 0; r < m; ++r) {
2489     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2490       a[r] = diagA[r];
2491       if (idx) idx[r] = cstart + diagIdx[r];
2492     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2493       a[r] = diagA[r];
2494       if (idx) {
2495         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2496           idx[r] = cstart + diagIdx[r];
2497         } else idx[r] = offdiagIdx[r];
2498       }
2499     } else {
2500       a[r] = offdiagA[r];
2501       if (idx) idx[r] = offdiagIdx[r];
2502     }
2503   }
2504   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2505   PetscCall(VecRestoreArrayWrite(v,       &a));
2506   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2507   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2508   PetscCall(VecDestroy(&diagV));
2509   PetscCall(VecDestroy(&offdiagV));
2510   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2515 {
2516   Mat            *dummy;
2517 
2518   PetscFunctionBegin;
2519   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2520   *newmat = *dummy;
2521   PetscCall(PetscFree(dummy));
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2526 {
2527   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2528 
2529   PetscFunctionBegin;
2530   PetscCall(MatInvertBlockDiagonal(a->A,values));
2531   A->factorerrortype = a->A->factorerrortype;
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2536 {
2537   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2538 
2539   PetscFunctionBegin;
2540   PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2541   PetscCall(MatSetRandom(aij->A,rctx));
2542   if (x->assembled) {
2543     PetscCall(MatSetRandom(aij->B,rctx));
2544   } else {
2545     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2546   }
2547   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2548   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2553 {
2554   PetscFunctionBegin;
2555   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2556   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 /*@
2561    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2562 
2563    Collective on Mat
2564 
2565    Input Parameters:
2566 +    A - the matrix
2567 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2568 
2569  Level: advanced
2570 
2571 @*/
2572 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2573 {
2574   PetscFunctionBegin;
2575   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2580 {
2581   PetscBool            sc = PETSC_FALSE,flg;
2582 
2583   PetscFunctionBegin;
2584   PetscCall(PetscOptionsHead(PetscOptionsObject,"MPIAIJ options"));
2585   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2586   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2587   if (flg) {
2588     PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2589   }
2590   PetscCall(PetscOptionsTail());
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2595 {
2596   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2597   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2598 
2599   PetscFunctionBegin;
2600   if (!Y->preallocated) {
2601     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2602   } else if (!aij->nz) {
2603     PetscInt nonew = aij->nonew;
2604     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2605     aij->nonew = nonew;
2606   }
2607   PetscCall(MatShift_Basic(Y,a));
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2612 {
2613   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2614 
2615   PetscFunctionBegin;
2616   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2617   PetscCall(MatMissingDiagonal(a->A,missing,d));
2618   if (d) {
2619     PetscInt rstart;
2620     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2621     *d += rstart;
2622 
2623   }
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2628 {
2629   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2630 
2631   PetscFunctionBegin;
2632   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 /* -------------------------------------------------------------------*/
2637 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2638                                        MatGetRow_MPIAIJ,
2639                                        MatRestoreRow_MPIAIJ,
2640                                        MatMult_MPIAIJ,
2641                                 /* 4*/ MatMultAdd_MPIAIJ,
2642                                        MatMultTranspose_MPIAIJ,
2643                                        MatMultTransposeAdd_MPIAIJ,
2644                                        NULL,
2645                                        NULL,
2646                                        NULL,
2647                                 /*10*/ NULL,
2648                                        NULL,
2649                                        NULL,
2650                                        MatSOR_MPIAIJ,
2651                                        MatTranspose_MPIAIJ,
2652                                 /*15*/ MatGetInfo_MPIAIJ,
2653                                        MatEqual_MPIAIJ,
2654                                        MatGetDiagonal_MPIAIJ,
2655                                        MatDiagonalScale_MPIAIJ,
2656                                        MatNorm_MPIAIJ,
2657                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2658                                        MatAssemblyEnd_MPIAIJ,
2659                                        MatSetOption_MPIAIJ,
2660                                        MatZeroEntries_MPIAIJ,
2661                                 /*24*/ MatZeroRows_MPIAIJ,
2662                                        NULL,
2663                                        NULL,
2664                                        NULL,
2665                                        NULL,
2666                                 /*29*/ MatSetUp_MPIAIJ,
2667                                        NULL,
2668                                        NULL,
2669                                        MatGetDiagonalBlock_MPIAIJ,
2670                                        NULL,
2671                                 /*34*/ MatDuplicate_MPIAIJ,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                        NULL,
2676                                 /*39*/ MatAXPY_MPIAIJ,
2677                                        MatCreateSubMatrices_MPIAIJ,
2678                                        MatIncreaseOverlap_MPIAIJ,
2679                                        MatGetValues_MPIAIJ,
2680                                        MatCopy_MPIAIJ,
2681                                 /*44*/ MatGetRowMax_MPIAIJ,
2682                                        MatScale_MPIAIJ,
2683                                        MatShift_MPIAIJ,
2684                                        MatDiagonalSet_MPIAIJ,
2685                                        MatZeroRowsColumns_MPIAIJ,
2686                                 /*49*/ MatSetRandom_MPIAIJ,
2687                                        NULL,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2692                                        NULL,
2693                                        MatSetUnfactored_MPIAIJ,
2694                                        MatPermute_MPIAIJ,
2695                                        NULL,
2696                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2697                                        MatDestroy_MPIAIJ,
2698                                        MatView_MPIAIJ,
2699                                        NULL,
2700                                        NULL,
2701                                 /*64*/ NULL,
2702                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2703                                        NULL,
2704                                        NULL,
2705                                        NULL,
2706                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2707                                        MatGetRowMinAbs_MPIAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                 /*75*/ MatFDColoringApply_AIJ,
2713                                        MatSetFromOptions_MPIAIJ,
2714                                        NULL,
2715                                        NULL,
2716                                        MatFindZeroDiagonals_MPIAIJ,
2717                                 /*80*/ NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*83*/ MatLoad_MPIAIJ,
2721                                        MatIsSymmetric_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                 /*89*/ NULL,
2727                                        NULL,
2728                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2729                                        NULL,
2730                                        NULL,
2731                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                        MatBindToCPU_MPIAIJ,
2736                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        MatConjugate_MPIAIJ,
2740                                        NULL,
2741                                 /*104*/MatSetValuesRow_MPIAIJ,
2742                                        MatRealPart_MPIAIJ,
2743                                        MatImaginaryPart_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                 /*109*/NULL,
2747                                        NULL,
2748                                        MatGetRowMin_MPIAIJ,
2749                                        NULL,
2750                                        MatMissingDiagonal_MPIAIJ,
2751                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2752                                        NULL,
2753                                        MatGetGhosts_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        NULL,
2759                                        NULL,
2760                                        MatGetMultiProcBlock_MPIAIJ,
2761                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2762                                        MatGetColumnReductions_MPIAIJ,
2763                                        MatInvertBlockDiagonal_MPIAIJ,
2764                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2765                                        MatCreateSubMatricesMPI_MPIAIJ,
2766                                 /*129*/NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2770                                        NULL,
2771                                 /*134*/NULL,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                 /*139*/MatSetBlockSizes_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        MatFDColoringSetUp_MPIXAIJ,
2780                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2781                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2782                                 /*145*/NULL,
2783                                        NULL,
2784                                        NULL
2785 };
2786 
2787 /* ----------------------------------------------------------------------------------------*/
2788 
2789 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2790 {
2791   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2792 
2793   PetscFunctionBegin;
2794   PetscCall(MatStoreValues(aij->A));
2795   PetscCall(MatStoreValues(aij->B));
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2800 {
2801   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2802 
2803   PetscFunctionBegin;
2804   PetscCall(MatRetrieveValues(aij->A));
2805   PetscCall(MatRetrieveValues(aij->B));
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2810 {
2811   Mat_MPIAIJ     *b;
2812   PetscMPIInt    size;
2813 
2814   PetscFunctionBegin;
2815   PetscCall(PetscLayoutSetUp(B->rmap));
2816   PetscCall(PetscLayoutSetUp(B->cmap));
2817   b = (Mat_MPIAIJ*)B->data;
2818 
2819 #if defined(PETSC_USE_CTABLE)
2820   PetscCall(PetscTableDestroy(&b->colmap));
2821 #else
2822   PetscCall(PetscFree(b->colmap));
2823 #endif
2824   PetscCall(PetscFree(b->garray));
2825   PetscCall(VecDestroy(&b->lvec));
2826   PetscCall(VecScatterDestroy(&b->Mvctx));
2827 
2828   /* Because the B will have been resized we simply destroy it and create a new one each time */
2829   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2830   PetscCall(MatDestroy(&b->B));
2831   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2832   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2833   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2834   PetscCall(MatSetType(b->B,MATSEQAIJ));
2835   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2836 
2837   if (!B->preallocated) {
2838     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2839     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2840     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2841     PetscCall(MatSetType(b->A,MATSEQAIJ));
2842     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2843   }
2844 
2845   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2846   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2847   B->preallocated  = PETSC_TRUE;
2848   B->was_assembled = PETSC_FALSE;
2849   B->assembled     = PETSC_FALSE;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2854 {
2855   Mat_MPIAIJ     *b;
2856 
2857   PetscFunctionBegin;
2858   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2859   PetscCall(PetscLayoutSetUp(B->rmap));
2860   PetscCall(PetscLayoutSetUp(B->cmap));
2861   b = (Mat_MPIAIJ*)B->data;
2862 
2863 #if defined(PETSC_USE_CTABLE)
2864   PetscCall(PetscTableDestroy(&b->colmap));
2865 #else
2866   PetscCall(PetscFree(b->colmap));
2867 #endif
2868   PetscCall(PetscFree(b->garray));
2869   PetscCall(VecDestroy(&b->lvec));
2870   PetscCall(VecScatterDestroy(&b->Mvctx));
2871 
2872   PetscCall(MatResetPreallocation(b->A));
2873   PetscCall(MatResetPreallocation(b->B));
2874   B->preallocated  = PETSC_TRUE;
2875   B->was_assembled = PETSC_FALSE;
2876   B->assembled = PETSC_FALSE;
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2881 {
2882   Mat            mat;
2883   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2884 
2885   PetscFunctionBegin;
2886   *newmat = NULL;
2887   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2888   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2889   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2890   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2891   a       = (Mat_MPIAIJ*)mat->data;
2892 
2893   mat->factortype   = matin->factortype;
2894   mat->assembled    = matin->assembled;
2895   mat->insertmode   = NOT_SET_VALUES;
2896   mat->preallocated = matin->preallocated;
2897 
2898   a->size         = oldmat->size;
2899   a->rank         = oldmat->rank;
2900   a->donotstash   = oldmat->donotstash;
2901   a->roworiented  = oldmat->roworiented;
2902   a->rowindices   = NULL;
2903   a->rowvalues    = NULL;
2904   a->getrowactive = PETSC_FALSE;
2905 
2906   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2907   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2908 
2909   if (oldmat->colmap) {
2910 #if defined(PETSC_USE_CTABLE)
2911     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2912 #else
2913     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2914     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2915     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2916 #endif
2917   } else a->colmap = NULL;
2918   if (oldmat->garray) {
2919     PetscInt len;
2920     len  = oldmat->B->cmap->n;
2921     PetscCall(PetscMalloc1(len+1,&a->garray));
2922     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2923     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2924   } else a->garray = NULL;
2925 
2926   /* It may happen MatDuplicate is called with a non-assembled matrix
2927      In fact, MatDuplicate only requires the matrix to be preallocated
2928      This may happen inside a DMCreateMatrix_Shell */
2929   if (oldmat->lvec) {
2930     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2931     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2932   }
2933   if (oldmat->Mvctx) {
2934     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2935     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2936   }
2937   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2938   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2939   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2940   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2941   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2942   *newmat = mat;
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2947 {
2948   PetscBool      isbinary, ishdf5;
2949 
2950   PetscFunctionBegin;
2951   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2952   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2953   /* force binary viewer to load .info file if it has not yet done so */
2954   PetscCall(PetscViewerSetUp(viewer));
2955   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2956   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2957   if (isbinary) {
2958     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2959   } else if (ishdf5) {
2960 #if defined(PETSC_HAVE_HDF5)
2961     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2962 #else
2963     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2964 #endif
2965   } else {
2966     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2967   }
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2972 {
2973   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2974   PetscInt       *rowidxs,*colidxs;
2975   PetscScalar    *matvals;
2976 
2977   PetscFunctionBegin;
2978   PetscCall(PetscViewerSetUp(viewer));
2979 
2980   /* read in matrix header */
2981   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
2982   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2983   M  = header[1]; N = header[2]; nz = header[3];
2984   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
2985   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
2986   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2987 
2988   /* set block sizes from the viewer's .info file */
2989   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
2990   /* set global sizes if not set already */
2991   if (mat->rmap->N < 0) mat->rmap->N = M;
2992   if (mat->cmap->N < 0) mat->cmap->N = N;
2993   PetscCall(PetscLayoutSetUp(mat->rmap));
2994   PetscCall(PetscLayoutSetUp(mat->cmap));
2995 
2996   /* check if the matrix sizes are correct */
2997   PetscCall(MatGetSize(mat,&rows,&cols));
2998   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
2999 
3000   /* read in row lengths and build row indices */
3001   PetscCall(MatGetLocalSize(mat,&m,NULL));
3002   PetscCall(PetscMalloc1(m+1,&rowidxs));
3003   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3004   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3005   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3006   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3007   /* read in column indices and matrix values */
3008   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3009   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3010   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3011   /* store matrix indices and values */
3012   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3013   PetscCall(PetscFree(rowidxs));
3014   PetscCall(PetscFree2(colidxs,matvals));
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 /* Not scalable because of ISAllGather() unless getting all columns. */
3019 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3020 {
3021   IS             iscol_local;
3022   PetscBool      isstride;
3023   PetscMPIInt    lisstride=0,gisstride;
3024 
3025   PetscFunctionBegin;
3026   /* check if we are grabbing all columns*/
3027   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3028 
3029   if (isstride) {
3030     PetscInt  start,len,mstart,mlen;
3031     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3032     PetscCall(ISGetLocalSize(iscol,&len));
3033     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3034     if (mstart == start && mlen-mstart == len) lisstride = 1;
3035   }
3036 
3037   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3038   if (gisstride) {
3039     PetscInt N;
3040     PetscCall(MatGetSize(mat,NULL,&N));
3041     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3042     PetscCall(ISSetIdentity(iscol_local));
3043     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3044   } else {
3045     PetscInt cbs;
3046     PetscCall(ISGetBlockSize(iscol,&cbs));
3047     PetscCall(ISAllGather(iscol,&iscol_local));
3048     PetscCall(ISSetBlockSize(iscol_local,cbs));
3049   }
3050 
3051   *isseq = iscol_local;
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 /*
3056  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3057  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3058 
3059  Input Parameters:
3060    mat - matrix
3061    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3062            i.e., mat->rstart <= isrow[i] < mat->rend
3063    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3064            i.e., mat->cstart <= iscol[i] < mat->cend
3065  Output Parameter:
3066    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3067    iscol_o - sequential column index set for retrieving mat->B
3068    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3069  */
3070 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3071 {
3072   Vec            x,cmap;
3073   const PetscInt *is_idx;
3074   PetscScalar    *xarray,*cmaparray;
3075   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3076   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3077   Mat            B=a->B;
3078   Vec            lvec=a->lvec,lcmap;
3079   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3080   MPI_Comm       comm;
3081   VecScatter     Mvctx=a->Mvctx;
3082 
3083   PetscFunctionBegin;
3084   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3085   PetscCall(ISGetLocalSize(iscol,&ncols));
3086 
3087   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3088   PetscCall(MatCreateVecs(mat,&x,NULL));
3089   PetscCall(VecSet(x,-1.0));
3090   PetscCall(VecDuplicate(x,&cmap));
3091   PetscCall(VecSet(cmap,-1.0));
3092 
3093   /* Get start indices */
3094   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3095   isstart -= ncols;
3096   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3097 
3098   PetscCall(ISGetIndices(iscol,&is_idx));
3099   PetscCall(VecGetArray(x,&xarray));
3100   PetscCall(VecGetArray(cmap,&cmaparray));
3101   PetscCall(PetscMalloc1(ncols,&idx));
3102   for (i=0; i<ncols; i++) {
3103     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3104     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3105     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3106   }
3107   PetscCall(VecRestoreArray(x,&xarray));
3108   PetscCall(VecRestoreArray(cmap,&cmaparray));
3109   PetscCall(ISRestoreIndices(iscol,&is_idx));
3110 
3111   /* Get iscol_d */
3112   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3113   PetscCall(ISGetBlockSize(iscol,&i));
3114   PetscCall(ISSetBlockSize(*iscol_d,i));
3115 
3116   /* Get isrow_d */
3117   PetscCall(ISGetLocalSize(isrow,&m));
3118   rstart = mat->rmap->rstart;
3119   PetscCall(PetscMalloc1(m,&idx));
3120   PetscCall(ISGetIndices(isrow,&is_idx));
3121   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3122   PetscCall(ISRestoreIndices(isrow,&is_idx));
3123 
3124   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3125   PetscCall(ISGetBlockSize(isrow,&i));
3126   PetscCall(ISSetBlockSize(*isrow_d,i));
3127 
3128   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3129   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3130   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3131 
3132   PetscCall(VecDuplicate(lvec,&lcmap));
3133 
3134   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3135   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3136 
3137   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3138   /* off-process column indices */
3139   count = 0;
3140   PetscCall(PetscMalloc1(Bn,&idx));
3141   PetscCall(PetscMalloc1(Bn,&cmap1));
3142 
3143   PetscCall(VecGetArray(lvec,&xarray));
3144   PetscCall(VecGetArray(lcmap,&cmaparray));
3145   for (i=0; i<Bn; i++) {
3146     if (PetscRealPart(xarray[i]) > -1.0) {
3147       idx[count]     = i;                   /* local column index in off-diagonal part B */
3148       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3149       count++;
3150     }
3151   }
3152   PetscCall(VecRestoreArray(lvec,&xarray));
3153   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3154 
3155   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3156   /* cannot ensure iscol_o has same blocksize as iscol! */
3157 
3158   PetscCall(PetscFree(idx));
3159   *garray = cmap1;
3160 
3161   PetscCall(VecDestroy(&x));
3162   PetscCall(VecDestroy(&cmap));
3163   PetscCall(VecDestroy(&lcmap));
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3168 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3169 {
3170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3171   Mat            M = NULL;
3172   MPI_Comm       comm;
3173   IS             iscol_d,isrow_d,iscol_o;
3174   Mat            Asub = NULL,Bsub = NULL;
3175   PetscInt       n;
3176 
3177   PetscFunctionBegin;
3178   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3179 
3180   if (call == MAT_REUSE_MATRIX) {
3181     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3182     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3183     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3184 
3185     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3186     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3187 
3188     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3189     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3190 
3191     /* Update diagonal and off-diagonal portions of submat */
3192     asub = (Mat_MPIAIJ*)(*submat)->data;
3193     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3194     PetscCall(ISGetLocalSize(iscol_o,&n));
3195     if (n) {
3196       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3197     }
3198     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3199     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3200 
3201   } else { /* call == MAT_INITIAL_MATRIX) */
3202     const PetscInt *garray;
3203     PetscInt        BsubN;
3204 
3205     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3206     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3207 
3208     /* Create local submatrices Asub and Bsub */
3209     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3210     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3211 
3212     /* Create submatrix M */
3213     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3214 
3215     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3216     asub = (Mat_MPIAIJ*)M->data;
3217 
3218     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3219     n = asub->B->cmap->N;
3220     if (BsubN > n) {
3221       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3222       const PetscInt *idx;
3223       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3224       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3225 
3226       PetscCall(PetscMalloc1(n,&idx_new));
3227       j = 0;
3228       PetscCall(ISGetIndices(iscol_o,&idx));
3229       for (i=0; i<n; i++) {
3230         if (j >= BsubN) break;
3231         while (subgarray[i] > garray[j]) j++;
3232 
3233         if (subgarray[i] == garray[j]) {
3234           idx_new[i] = idx[j++];
3235         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3236       }
3237       PetscCall(ISRestoreIndices(iscol_o,&idx));
3238 
3239       PetscCall(ISDestroy(&iscol_o));
3240       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3241 
3242     } else if (BsubN < n) {
3243       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3244     }
3245 
3246     PetscCall(PetscFree(garray));
3247     *submat = M;
3248 
3249     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3250     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3251     PetscCall(ISDestroy(&isrow_d));
3252 
3253     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3254     PetscCall(ISDestroy(&iscol_d));
3255 
3256     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3257     PetscCall(ISDestroy(&iscol_o));
3258   }
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3263 {
3264   IS             iscol_local=NULL,isrow_d;
3265   PetscInt       csize;
3266   PetscInt       n,i,j,start,end;
3267   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3268   MPI_Comm       comm;
3269 
3270   PetscFunctionBegin;
3271   /* If isrow has same processor distribution as mat,
3272      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3273   if (call == MAT_REUSE_MATRIX) {
3274     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3275     if (isrow_d) {
3276       sameRowDist  = PETSC_TRUE;
3277       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3278     } else {
3279       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3280       if (iscol_local) {
3281         sameRowDist  = PETSC_TRUE;
3282         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3283       }
3284     }
3285   } else {
3286     /* Check if isrow has same processor distribution as mat */
3287     sameDist[0] = PETSC_FALSE;
3288     PetscCall(ISGetLocalSize(isrow,&n));
3289     if (!n) {
3290       sameDist[0] = PETSC_TRUE;
3291     } else {
3292       PetscCall(ISGetMinMax(isrow,&i,&j));
3293       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3294       if (i >= start && j < end) {
3295         sameDist[0] = PETSC_TRUE;
3296       }
3297     }
3298 
3299     /* Check if iscol has same processor distribution as mat */
3300     sameDist[1] = PETSC_FALSE;
3301     PetscCall(ISGetLocalSize(iscol,&n));
3302     if (!n) {
3303       sameDist[1] = PETSC_TRUE;
3304     } else {
3305       PetscCall(ISGetMinMax(iscol,&i,&j));
3306       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3307       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3308     }
3309 
3310     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3311     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3312     sameRowDist = tsameDist[0];
3313   }
3314 
3315   if (sameRowDist) {
3316     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3317       /* isrow and iscol have same processor distribution as mat */
3318       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3319       PetscFunctionReturn(0);
3320     } else { /* sameRowDist */
3321       /* isrow has same processor distribution as mat */
3322       if (call == MAT_INITIAL_MATRIX) {
3323         PetscBool sorted;
3324         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3325         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3326         PetscCall(ISGetSize(iscol,&i));
3327         PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3328 
3329         PetscCall(ISSorted(iscol_local,&sorted));
3330         if (sorted) {
3331           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3332           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3333           PetscFunctionReturn(0);
3334         }
3335       } else { /* call == MAT_REUSE_MATRIX */
3336         IS iscol_sub;
3337         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3338         if (iscol_sub) {
3339           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3340           PetscFunctionReturn(0);
3341         }
3342       }
3343     }
3344   }
3345 
3346   /* General case: iscol -> iscol_local which has global size of iscol */
3347   if (call == MAT_REUSE_MATRIX) {
3348     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3349     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3350   } else {
3351     if (!iscol_local) {
3352       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3353     }
3354   }
3355 
3356   PetscCall(ISGetLocalSize(iscol,&csize));
3357   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3358 
3359   if (call == MAT_INITIAL_MATRIX) {
3360     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3361     PetscCall(ISDestroy(&iscol_local));
3362   }
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 /*@C
3367      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3368          and "off-diagonal" part of the matrix in CSR format.
3369 
3370    Collective
3371 
3372    Input Parameters:
3373 +  comm - MPI communicator
3374 .  A - "diagonal" portion of matrix
3375 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3376 -  garray - global index of B columns
3377 
3378    Output Parameter:
3379 .   mat - the matrix, with input A as its local diagonal matrix
3380    Level: advanced
3381 
3382    Notes:
3383        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3384        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3385 
3386 .seealso: MatCreateMPIAIJWithSplitArrays()
3387 @*/
3388 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3389 {
3390   Mat_MPIAIJ        *maij;
3391   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3392   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3393   const PetscScalar *oa;
3394   Mat               Bnew;
3395   PetscInt          m,n,N;
3396 
3397   PetscFunctionBegin;
3398   PetscCall(MatCreate(comm,mat));
3399   PetscCall(MatGetSize(A,&m,&n));
3400   PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3401   PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3402   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3403   /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3404 
3405   /* Get global columns of mat */
3406   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3407 
3408   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3409   PetscCall(MatSetType(*mat,MATMPIAIJ));
3410   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3411   maij = (Mat_MPIAIJ*)(*mat)->data;
3412 
3413   (*mat)->preallocated = PETSC_TRUE;
3414 
3415   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3416   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3417 
3418   /* Set A as diagonal portion of *mat */
3419   maij->A = A;
3420 
3421   nz = oi[m];
3422   for (i=0; i<nz; i++) {
3423     col   = oj[i];
3424     oj[i] = garray[col];
3425   }
3426 
3427   /* Set Bnew as off-diagonal portion of *mat */
3428   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3429   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3430   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3431   bnew        = (Mat_SeqAIJ*)Bnew->data;
3432   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3433   maij->B     = Bnew;
3434 
3435   PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3436 
3437   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3438   b->free_a       = PETSC_FALSE;
3439   b->free_ij      = PETSC_FALSE;
3440   PetscCall(MatDestroy(&B));
3441 
3442   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3443   bnew->free_a       = PETSC_TRUE;
3444   bnew->free_ij      = PETSC_TRUE;
3445 
3446   /* condense columns of maij->B */
3447   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3448   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3449   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3450   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3451   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3452   PetscFunctionReturn(0);
3453 }
3454 
3455 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3456 
3457 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3458 {
3459   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3460   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3461   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3462   Mat            M,Msub,B=a->B;
3463   MatScalar      *aa;
3464   Mat_SeqAIJ     *aij;
3465   PetscInt       *garray = a->garray,*colsub,Ncols;
3466   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3467   IS             iscol_sub,iscmap;
3468   const PetscInt *is_idx,*cmap;
3469   PetscBool      allcolumns=PETSC_FALSE;
3470   MPI_Comm       comm;
3471 
3472   PetscFunctionBegin;
3473   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3474   if (call == MAT_REUSE_MATRIX) {
3475     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3476     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3477     PetscCall(ISGetLocalSize(iscol_sub,&count));
3478 
3479     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3480     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3481 
3482     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3483     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3484 
3485     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3486 
3487   } else { /* call == MAT_INITIAL_MATRIX) */
3488     PetscBool flg;
3489 
3490     PetscCall(ISGetLocalSize(iscol,&n));
3491     PetscCall(ISGetSize(iscol,&Ncols));
3492 
3493     /* (1) iscol -> nonscalable iscol_local */
3494     /* Check for special case: each processor gets entire matrix columns */
3495     PetscCall(ISIdentity(iscol_local,&flg));
3496     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3497     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3498     if (allcolumns) {
3499       iscol_sub = iscol_local;
3500       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3501       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3502 
3503     } else {
3504       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3505       PetscInt *idx,*cmap1,k;
3506       PetscCall(PetscMalloc1(Ncols,&idx));
3507       PetscCall(PetscMalloc1(Ncols,&cmap1));
3508       PetscCall(ISGetIndices(iscol_local,&is_idx));
3509       count = 0;
3510       k     = 0;
3511       for (i=0; i<Ncols; i++) {
3512         j = is_idx[i];
3513         if (j >= cstart && j < cend) {
3514           /* diagonal part of mat */
3515           idx[count]     = j;
3516           cmap1[count++] = i; /* column index in submat */
3517         } else if (Bn) {
3518           /* off-diagonal part of mat */
3519           if (j == garray[k]) {
3520             idx[count]     = j;
3521             cmap1[count++] = i;  /* column index in submat */
3522           } else if (j > garray[k]) {
3523             while (j > garray[k] && k < Bn-1) k++;
3524             if (j == garray[k]) {
3525               idx[count]     = j;
3526               cmap1[count++] = i; /* column index in submat */
3527             }
3528           }
3529         }
3530       }
3531       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3532 
3533       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3534       PetscCall(ISGetBlockSize(iscol,&cbs));
3535       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3536 
3537       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3538     }
3539 
3540     /* (3) Create sequential Msub */
3541     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3542   }
3543 
3544   PetscCall(ISGetLocalSize(iscol_sub,&count));
3545   aij  = (Mat_SeqAIJ*)(Msub)->data;
3546   ii   = aij->i;
3547   PetscCall(ISGetIndices(iscmap,&cmap));
3548 
3549   /*
3550       m - number of local rows
3551       Ncols - number of columns (same on all processors)
3552       rstart - first row in new global matrix generated
3553   */
3554   PetscCall(MatGetSize(Msub,&m,NULL));
3555 
3556   if (call == MAT_INITIAL_MATRIX) {
3557     /* (4) Create parallel newmat */
3558     PetscMPIInt    rank,size;
3559     PetscInt       csize;
3560 
3561     PetscCallMPI(MPI_Comm_size(comm,&size));
3562     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3563 
3564     /*
3565         Determine the number of non-zeros in the diagonal and off-diagonal
3566         portions of the matrix in order to do correct preallocation
3567     */
3568 
3569     /* first get start and end of "diagonal" columns */
3570     PetscCall(ISGetLocalSize(iscol,&csize));
3571     if (csize == PETSC_DECIDE) {
3572       PetscCall(ISGetSize(isrow,&mglobal));
3573       if (mglobal == Ncols) { /* square matrix */
3574         nlocal = m;
3575       } else {
3576         nlocal = Ncols/size + ((Ncols % size) > rank);
3577       }
3578     } else {
3579       nlocal = csize;
3580     }
3581     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3582     rstart = rend - nlocal;
3583     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3584 
3585     /* next, compute all the lengths */
3586     jj    = aij->j;
3587     PetscCall(PetscMalloc1(2*m+1,&dlens));
3588     olens = dlens + m;
3589     for (i=0; i<m; i++) {
3590       jend = ii[i+1] - ii[i];
3591       olen = 0;
3592       dlen = 0;
3593       for (j=0; j<jend; j++) {
3594         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3595         else dlen++;
3596         jj++;
3597       }
3598       olens[i] = olen;
3599       dlens[i] = dlen;
3600     }
3601 
3602     PetscCall(ISGetBlockSize(isrow,&bs));
3603     PetscCall(ISGetBlockSize(iscol,&cbs));
3604 
3605     PetscCall(MatCreate(comm,&M));
3606     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3607     PetscCall(MatSetBlockSizes(M,bs,cbs));
3608     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3609     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3610     PetscCall(PetscFree(dlens));
3611 
3612   } else { /* call == MAT_REUSE_MATRIX */
3613     M    = *newmat;
3614     PetscCall(MatGetLocalSize(M,&i,NULL));
3615     PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3616     PetscCall(MatZeroEntries(M));
3617     /*
3618          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3619        rather than the slower MatSetValues().
3620     */
3621     M->was_assembled = PETSC_TRUE;
3622     M->assembled     = PETSC_FALSE;
3623   }
3624 
3625   /* (5) Set values of Msub to *newmat */
3626   PetscCall(PetscMalloc1(count,&colsub));
3627   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3628 
3629   jj   = aij->j;
3630   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3631   for (i=0; i<m; i++) {
3632     row = rstart + i;
3633     nz  = ii[i+1] - ii[i];
3634     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3635     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3636     jj += nz; aa += nz;
3637   }
3638   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3639   PetscCall(ISRestoreIndices(iscmap,&cmap));
3640 
3641   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3642   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3643 
3644   PetscCall(PetscFree(colsub));
3645 
3646   /* save Msub, iscol_sub and iscmap used in processor for next request */
3647   if (call == MAT_INITIAL_MATRIX) {
3648     *newmat = M;
3649     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3650     PetscCall(MatDestroy(&Msub));
3651 
3652     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3653     PetscCall(ISDestroy(&iscol_sub));
3654 
3655     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3656     PetscCall(ISDestroy(&iscmap));
3657 
3658     if (iscol_local) {
3659       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3660       PetscCall(ISDestroy(&iscol_local));
3661     }
3662   }
3663   PetscFunctionReturn(0);
3664 }
3665 
3666 /*
3667     Not great since it makes two copies of the submatrix, first an SeqAIJ
3668   in local and then by concatenating the local matrices the end result.
3669   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3670 
3671   Note: This requires a sequential iscol with all indices.
3672 */
3673 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3674 {
3675   PetscMPIInt    rank,size;
3676   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3677   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3678   Mat            M,Mreuse;
3679   MatScalar      *aa,*vwork;
3680   MPI_Comm       comm;
3681   Mat_SeqAIJ     *aij;
3682   PetscBool      colflag,allcolumns=PETSC_FALSE;
3683 
3684   PetscFunctionBegin;
3685   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3686   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3687   PetscCallMPI(MPI_Comm_size(comm,&size));
3688 
3689   /* Check for special case: each processor gets entire matrix columns */
3690   PetscCall(ISIdentity(iscol,&colflag));
3691   PetscCall(ISGetLocalSize(iscol,&n));
3692   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3693   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3694 
3695   if (call ==  MAT_REUSE_MATRIX) {
3696     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3697     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3698     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3699   } else {
3700     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3701   }
3702 
3703   /*
3704       m - number of local rows
3705       n - number of columns (same on all processors)
3706       rstart - first row in new global matrix generated
3707   */
3708   PetscCall(MatGetSize(Mreuse,&m,&n));
3709   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3710   if (call == MAT_INITIAL_MATRIX) {
3711     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3712     ii  = aij->i;
3713     jj  = aij->j;
3714 
3715     /*
3716         Determine the number of non-zeros in the diagonal and off-diagonal
3717         portions of the matrix in order to do correct preallocation
3718     */
3719 
3720     /* first get start and end of "diagonal" columns */
3721     if (csize == PETSC_DECIDE) {
3722       PetscCall(ISGetSize(isrow,&mglobal));
3723       if (mglobal == n) { /* square matrix */
3724         nlocal = m;
3725       } else {
3726         nlocal = n/size + ((n % size) > rank);
3727       }
3728     } else {
3729       nlocal = csize;
3730     }
3731     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3732     rstart = rend - nlocal;
3733     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3734 
3735     /* next, compute all the lengths */
3736     PetscCall(PetscMalloc1(2*m+1,&dlens));
3737     olens = dlens + m;
3738     for (i=0; i<m; i++) {
3739       jend = ii[i+1] - ii[i];
3740       olen = 0;
3741       dlen = 0;
3742       for (j=0; j<jend; j++) {
3743         if (*jj < rstart || *jj >= rend) olen++;
3744         else dlen++;
3745         jj++;
3746       }
3747       olens[i] = olen;
3748       dlens[i] = dlen;
3749     }
3750     PetscCall(MatCreate(comm,&M));
3751     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3752     PetscCall(MatSetBlockSizes(M,bs,cbs));
3753     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3754     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3755     PetscCall(PetscFree(dlens));
3756   } else {
3757     PetscInt ml,nl;
3758 
3759     M    = *newmat;
3760     PetscCall(MatGetLocalSize(M,&ml,&nl));
3761     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3762     PetscCall(MatZeroEntries(M));
3763     /*
3764          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3765        rather than the slower MatSetValues().
3766     */
3767     M->was_assembled = PETSC_TRUE;
3768     M->assembled     = PETSC_FALSE;
3769   }
3770   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3771   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3772   ii   = aij->i;
3773   jj   = aij->j;
3774 
3775   /* trigger copy to CPU if needed */
3776   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3777   for (i=0; i<m; i++) {
3778     row   = rstart + i;
3779     nz    = ii[i+1] - ii[i];
3780     cwork = jj; jj += nz;
3781     vwork = aa; aa += nz;
3782     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3783   }
3784   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3785 
3786   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3787   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3788   *newmat = M;
3789 
3790   /* save submatrix used in processor for next request */
3791   if (call ==  MAT_INITIAL_MATRIX) {
3792     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3793     PetscCall(MatDestroy(&Mreuse));
3794   }
3795   PetscFunctionReturn(0);
3796 }
3797 
3798 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3799 {
3800   PetscInt       m,cstart, cend,j,nnz,i,d;
3801   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3802   const PetscInt *JJ;
3803   PetscBool      nooffprocentries;
3804 
3805   PetscFunctionBegin;
3806   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3807 
3808   PetscCall(PetscLayoutSetUp(B->rmap));
3809   PetscCall(PetscLayoutSetUp(B->cmap));
3810   m      = B->rmap->n;
3811   cstart = B->cmap->rstart;
3812   cend   = B->cmap->rend;
3813   rstart = B->rmap->rstart;
3814 
3815   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3816 
3817   if (PetscDefined(USE_DEBUG)) {
3818     for (i=0; i<m; i++) {
3819       nnz = Ii[i+1]- Ii[i];
3820       JJ  = J + Ii[i];
3821       PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3822       PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3823       PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3824     }
3825   }
3826 
3827   for (i=0; i<m; i++) {
3828     nnz     = Ii[i+1]- Ii[i];
3829     JJ      = J + Ii[i];
3830     nnz_max = PetscMax(nnz_max,nnz);
3831     d       = 0;
3832     for (j=0; j<nnz; j++) {
3833       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3834     }
3835     d_nnz[i] = d;
3836     o_nnz[i] = nnz - d;
3837   }
3838   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3839   PetscCall(PetscFree2(d_nnz,o_nnz));
3840 
3841   for (i=0; i<m; i++) {
3842     ii   = i + rstart;
3843     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3844   }
3845   nooffprocentries    = B->nooffprocentries;
3846   B->nooffprocentries = PETSC_TRUE;
3847   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3848   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3849   B->nooffprocentries = nooffprocentries;
3850 
3851   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3852   PetscFunctionReturn(0);
3853 }
3854 
3855 /*@
3856    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3857    (the default parallel PETSc format).
3858 
3859    Collective
3860 
3861    Input Parameters:
3862 +  B - the matrix
3863 .  i - the indices into j for the start of each local row (starts with zero)
3864 .  j - the column indices for each local row (starts with zero)
3865 -  v - optional values in the matrix
3866 
3867    Level: developer
3868 
3869    Notes:
3870        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3871      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3872      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873 
3874        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875 
3876        The format which is used for the sparse matrix input, is equivalent to a
3877     row-major ordering.. i.e for the following matrix, the input data expected is
3878     as shown
3879 
3880 $        1 0 0
3881 $        2 0 3     P0
3882 $       -------
3883 $        4 5 6     P1
3884 $
3885 $     Process0 [P0]: rows_owned=[0,1]
3886 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3887 $        j =  {0,0,2}  [size = 3]
3888 $        v =  {1,2,3}  [size = 3]
3889 $
3890 $     Process1 [P1]: rows_owned=[2]
3891 $        i =  {0,3}    [size = nrow+1  = 1+1]
3892 $        j =  {0,1,2}  [size = 3]
3893 $        v =  {4,5,6}  [size = 3]
3894 
3895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3896           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3897 @*/
3898 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3899 {
3900   PetscFunctionBegin;
3901   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 /*@C
3906    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3907    (the default parallel PETSc format).  For good matrix assembly performance
3908    the user should preallocate the matrix storage by setting the parameters
3909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3910    performance can be increased by more than a factor of 50.
3911 
3912    Collective
3913 
3914    Input Parameters:
3915 +  B - the matrix
3916 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3917            (same value is used for all local rows)
3918 .  d_nnz - array containing the number of nonzeros in the various rows of the
3919            DIAGONAL portion of the local submatrix (possibly different for each row)
3920            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3921            The size of this array is equal to the number of local rows, i.e 'm'.
3922            For matrices that will be factored, you must leave room for (and set)
3923            the diagonal entry even if it is zero.
3924 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3925            submatrix (same value is used for all local rows).
3926 -  o_nnz - array containing the number of nonzeros in the various rows of the
3927            OFF-DIAGONAL portion of the local submatrix (possibly different for
3928            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3929            structure. The size of this array is equal to the number
3930            of local rows, i.e 'm'.
3931 
3932    If the *_nnz parameter is given then the *_nz parameter is ignored
3933 
3934    The AIJ format (also called the Yale sparse matrix format or
3935    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3936    storage.  The stored row and column indices begin with zero.
3937    See Users-Manual: ch_mat for details.
3938 
3939    The parallel matrix is partitioned such that the first m0 rows belong to
3940    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3941    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3942 
3943    The DIAGONAL portion of the local submatrix of a processor can be defined
3944    as the submatrix which is obtained by extraction the part corresponding to
3945    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3946    first row that belongs to the processor, r2 is the last row belonging to
3947    the this processor, and c1-c2 is range of indices of the local part of a
3948    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3949    common case of a square matrix, the row and column ranges are the same and
3950    the DIAGONAL part is also square. The remaining portion of the local
3951    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3952 
3953    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3954 
3955    You can call MatGetInfo() to get information on how effective the preallocation was;
3956    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3957    You can also run with the option -info and look for messages with the string
3958    malloc in them to see if additional memory allocation was needed.
3959 
3960    Example usage:
3961 
3962    Consider the following 8x8 matrix with 34 non-zero values, that is
3963    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3964    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3965    as follows:
3966 
3967 .vb
3968             1  2  0  |  0  3  0  |  0  4
3969     Proc0   0  5  6  |  7  0  0  |  8  0
3970             9  0 10  | 11  0  0  | 12  0
3971     -------------------------------------
3972            13  0 14  | 15 16 17  |  0  0
3973     Proc1   0 18  0  | 19 20 21  |  0  0
3974             0  0  0  | 22 23  0  | 24  0
3975     -------------------------------------
3976     Proc2  25 26 27  |  0  0 28  | 29  0
3977            30  0  0  | 31 32 33  |  0 34
3978 .ve
3979 
3980    This can be represented as a collection of submatrices as:
3981 
3982 .vb
3983       A B C
3984       D E F
3985       G H I
3986 .ve
3987 
3988    Where the submatrices A,B,C are owned by proc0, D,E,F are
3989    owned by proc1, G,H,I are owned by proc2.
3990 
3991    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3993    The 'M','N' parameters are 8,8, and have the same values on all procs.
3994 
3995    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3996    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3997    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3998    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3999    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4000    matrix, ans [DF] as another SeqAIJ matrix.
4001 
4002    When d_nz, o_nz parameters are specified, d_nz storage elements are
4003    allocated for every row of the local diagonal submatrix, and o_nz
4004    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4005    One way to choose d_nz and o_nz is to use the max nonzerors per local
4006    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4007    In this case, the values of d_nz,o_nz are:
4008 .vb
4009      proc0 : dnz = 2, o_nz = 2
4010      proc1 : dnz = 3, o_nz = 2
4011      proc2 : dnz = 1, o_nz = 4
4012 .ve
4013    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4014    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4015    for proc3. i.e we are using 12+15+10=37 storage locations to store
4016    34 values.
4017 
4018    When d_nnz, o_nnz parameters are specified, the storage is specified
4019    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4020    In the above case the values for d_nnz,o_nnz are:
4021 .vb
4022      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4023      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4024      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4025 .ve
4026    Here the space allocated is sum of all the above values i.e 34, and
4027    hence pre-allocation is perfect.
4028 
4029    Level: intermediate
4030 
4031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4032           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4033 @*/
4034 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4035 {
4036   PetscFunctionBegin;
4037   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4038   PetscValidType(B,1);
4039   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 /*@
4044      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4045          CSR format for the local rows.
4046 
4047    Collective
4048 
4049    Input Parameters:
4050 +  comm - MPI communicator
4051 .  m - number of local rows (Cannot be PETSC_DECIDE)
4052 .  n - This value should be the same as the local size used in creating the
4053        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4054        calculated if N is given) For square matrices n is almost always m.
4055 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4056 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4057 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4058 .   j - column indices
4059 -   a - matrix values
4060 
4061    Output Parameter:
4062 .   mat - the matrix
4063 
4064    Level: intermediate
4065 
4066    Notes:
4067        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4068      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4069      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4070 
4071        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4072 
4073        The format which is used for the sparse matrix input, is equivalent to a
4074     row-major ordering.. i.e for the following matrix, the input data expected is
4075     as shown
4076 
4077        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4078 
4079 $        1 0 0
4080 $        2 0 3     P0
4081 $       -------
4082 $        4 5 6     P1
4083 $
4084 $     Process0 [P0]: rows_owned=[0,1]
4085 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4086 $        j =  {0,0,2}  [size = 3]
4087 $        v =  {1,2,3}  [size = 3]
4088 $
4089 $     Process1 [P1]: rows_owned=[2]
4090 $        i =  {0,3}    [size = nrow+1  = 1+1]
4091 $        j =  {0,1,2}  [size = 3]
4092 $        v =  {4,5,6}  [size = 3]
4093 
4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4095           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4096 @*/
4097 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4098 {
4099   PetscFunctionBegin;
4100   PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4101   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4102   PetscCall(MatCreate(comm,mat));
4103   PetscCall(MatSetSizes(*mat,m,n,M,N));
4104   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4105   PetscCall(MatSetType(*mat,MATMPIAIJ));
4106   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4107   PetscFunctionReturn(0);
4108 }
4109 
4110 /*@
4111      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4112          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4113 
4114    Collective
4115 
4116    Input Parameters:
4117 +  mat - the matrix
4118 .  m - number of local rows (Cannot be PETSC_DECIDE)
4119 .  n - This value should be the same as the local size used in creating the
4120        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4121        calculated if N is given) For square matrices n is almost always m.
4122 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4123 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4124 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4125 .  J - column indices
4126 -  v - matrix values
4127 
4128    Level: intermediate
4129 
4130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4131           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4132 @*/
4133 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4134 {
4135   PetscInt       cstart,nnz,i,j;
4136   PetscInt       *ld;
4137   PetscBool      nooffprocentries;
4138   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4139   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4140   PetscScalar    *ad,*ao;
4141   const PetscInt *Adi = Ad->i;
4142   PetscInt       ldi,Iii,md;
4143 
4144   PetscFunctionBegin;
4145   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4146   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4147   PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4148   PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4149 
4150   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4151   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4152   cstart = mat->cmap->rstart;
4153   if (!Aij->ld) {
4154     /* count number of entries below block diagonal */
4155     PetscCall(PetscCalloc1(m,&ld));
4156     Aij->ld = ld;
4157     for (i=0; i<m; i++) {
4158       nnz  = Ii[i+1]- Ii[i];
4159       j     = 0;
4160       while  (J[j] < cstart && j < nnz) {j++;}
4161       J    += nnz;
4162       ld[i] = j;
4163     }
4164   } else {
4165     ld = Aij->ld;
4166   }
4167 
4168   for (i=0; i<m; i++) {
4169     nnz  = Ii[i+1]- Ii[i];
4170     Iii  = Ii[i];
4171     ldi  = ld[i];
4172     md   = Adi[i+1]-Adi[i];
4173     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4174     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4175     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4176     ad  += md;
4177     ao  += nnz - md;
4178   }
4179   nooffprocentries      = mat->nooffprocentries;
4180   mat->nooffprocentries = PETSC_TRUE;
4181   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4182   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4183   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4184   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4185   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4186   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4187   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4188   mat->nooffprocentries = nooffprocentries;
4189   PetscFunctionReturn(0);
4190 }
4191 
4192 /*@C
4193    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4194    (the default parallel PETSc format).  For good matrix assembly performance
4195    the user should preallocate the matrix storage by setting the parameters
4196    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4197    performance can be increased by more than a factor of 50.
4198 
4199    Collective
4200 
4201    Input Parameters:
4202 +  comm - MPI communicator
4203 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4204            This value should be the same as the local size used in creating the
4205            y vector for the matrix-vector product y = Ax.
4206 .  n - This value should be the same as the local size used in creating the
4207        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4208        calculated if N is given) For square matrices n is almost always m.
4209 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4210 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4211 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4212            (same value is used for all local rows)
4213 .  d_nnz - array containing the number of nonzeros in the various rows of the
4214            DIAGONAL portion of the local submatrix (possibly different for each row)
4215            or NULL, if d_nz is used to specify the nonzero structure.
4216            The size of this array is equal to the number of local rows, i.e 'm'.
4217 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4218            submatrix (same value is used for all local rows).
4219 -  o_nnz - array containing the number of nonzeros in the various rows of the
4220            OFF-DIAGONAL portion of the local submatrix (possibly different for
4221            each row) or NULL, if o_nz is used to specify the nonzero
4222            structure. The size of this array is equal to the number
4223            of local rows, i.e 'm'.
4224 
4225    Output Parameter:
4226 .  A - the matrix
4227 
4228    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4229    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4230    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4231 
4232    Notes:
4233    If the *_nnz parameter is given then the *_nz parameter is ignored
4234 
4235    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4236    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4237    storage requirements for this matrix.
4238 
4239    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4240    processor than it must be used on all processors that share the object for
4241    that argument.
4242 
4243    The user MUST specify either the local or global matrix dimensions
4244    (possibly both).
4245 
4246    The parallel matrix is partitioned across processors such that the
4247    first m0 rows belong to process 0, the next m1 rows belong to
4248    process 1, the next m2 rows belong to process 2 etc.. where
4249    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4250    values corresponding to [m x N] submatrix.
4251 
4252    The columns are logically partitioned with the n0 columns belonging
4253    to 0th partition, the next n1 columns belonging to the next
4254    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4255 
4256    The DIAGONAL portion of the local submatrix on any given processor
4257    is the submatrix corresponding to the rows and columns m,n
4258    corresponding to the given processor. i.e diagonal matrix on
4259    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4260    etc. The remaining portion of the local submatrix [m x (N-n)]
4261    constitute the OFF-DIAGONAL portion. The example below better
4262    illustrates this concept.
4263 
4264    For a square global matrix we define each processor's diagonal portion
4265    to be its local rows and the corresponding columns (a square submatrix);
4266    each processor's off-diagonal portion encompasses the remainder of the
4267    local matrix (a rectangular submatrix).
4268 
4269    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4270 
4271    When calling this routine with a single process communicator, a matrix of
4272    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4273    type of communicator, use the construction mechanism
4274 .vb
4275      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4276 .ve
4277 
4278 $     MatCreate(...,&A);
4279 $     MatSetType(A,MATMPIAIJ);
4280 $     MatSetSizes(A, m,n,M,N);
4281 $     MatMPIAIJSetPreallocation(A,...);
4282 
4283    By default, this format uses inodes (identical nodes) when possible.
4284    We search for consecutive rows with the same nonzero structure, thereby
4285    reusing matrix information to achieve increased efficiency.
4286 
4287    Options Database Keys:
4288 +  -mat_no_inode  - Do not use inodes
4289 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4290 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4291         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4292         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4293 
4294    Example usage:
4295 
4296    Consider the following 8x8 matrix with 34 non-zero values, that is
4297    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4298    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4299    as follows
4300 
4301 .vb
4302             1  2  0  |  0  3  0  |  0  4
4303     Proc0   0  5  6  |  7  0  0  |  8  0
4304             9  0 10  | 11  0  0  | 12  0
4305     -------------------------------------
4306            13  0 14  | 15 16 17  |  0  0
4307     Proc1   0 18  0  | 19 20 21  |  0  0
4308             0  0  0  | 22 23  0  | 24  0
4309     -------------------------------------
4310     Proc2  25 26 27  |  0  0 28  | 29  0
4311            30  0  0  | 31 32 33  |  0 34
4312 .ve
4313 
4314    This can be represented as a collection of submatrices as
4315 
4316 .vb
4317       A B C
4318       D E F
4319       G H I
4320 .ve
4321 
4322    Where the submatrices A,B,C are owned by proc0, D,E,F are
4323    owned by proc1, G,H,I are owned by proc2.
4324 
4325    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4326    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4327    The 'M','N' parameters are 8,8, and have the same values on all procs.
4328 
4329    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4330    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4331    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4332    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4333    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4334    matrix, ans [DF] as another SeqAIJ matrix.
4335 
4336    When d_nz, o_nz parameters are specified, d_nz storage elements are
4337    allocated for every row of the local diagonal submatrix, and o_nz
4338    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4339    One way to choose d_nz and o_nz is to use the max nonzerors per local
4340    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4341    In this case, the values of d_nz,o_nz are
4342 .vb
4343      proc0 : dnz = 2, o_nz = 2
4344      proc1 : dnz = 3, o_nz = 2
4345      proc2 : dnz = 1, o_nz = 4
4346 .ve
4347    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4348    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4349    for proc3. i.e we are using 12+15+10=37 storage locations to store
4350    34 values.
4351 
4352    When d_nnz, o_nnz parameters are specified, the storage is specified
4353    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4354    In the above case the values for d_nnz,o_nnz are
4355 .vb
4356      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4357      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4358      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4359 .ve
4360    Here the space allocated is sum of all the above values i.e 34, and
4361    hence pre-allocation is perfect.
4362 
4363    Level: intermediate
4364 
4365 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4366           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4367 @*/
4368 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4369 {
4370   PetscMPIInt    size;
4371 
4372   PetscFunctionBegin;
4373   PetscCall(MatCreate(comm,A));
4374   PetscCall(MatSetSizes(*A,m,n,M,N));
4375   PetscCallMPI(MPI_Comm_size(comm,&size));
4376   if (size > 1) {
4377     PetscCall(MatSetType(*A,MATMPIAIJ));
4378     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4379   } else {
4380     PetscCall(MatSetType(*A,MATSEQAIJ));
4381     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4382   }
4383   PetscFunctionReturn(0);
4384 }
4385 
4386 /*@C
4387   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4388 
4389   Not collective
4390 
4391   Input Parameter:
4392 . A - The MPIAIJ matrix
4393 
4394   Output Parameters:
4395 + Ad - The local diagonal block as a SeqAIJ matrix
4396 . Ao - The local off-diagonal block as a SeqAIJ matrix
4397 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4398 
4399   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4400   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4401   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4402   local column numbers to global column numbers in the original matrix.
4403 
4404   Level: intermediate
4405 
4406 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4407 @*/
4408 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4409 {
4410   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4411   PetscBool      flg;
4412 
4413   PetscFunctionBegin;
4414   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4415   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4416   if (Ad)     *Ad     = a->A;
4417   if (Ao)     *Ao     = a->B;
4418   if (colmap) *colmap = a->garray;
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4423 {
4424   PetscErrorCode ierr;
4425   PetscInt       m,N,i,rstart,nnz,Ii;
4426   PetscInt       *indx;
4427   PetscScalar    *values;
4428   MatType        rootType;
4429 
4430   PetscFunctionBegin;
4431   PetscCall(MatGetSize(inmat,&m,&N));
4432   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4433     PetscInt       *dnz,*onz,sum,bs,cbs;
4434 
4435     if (n == PETSC_DECIDE) {
4436       PetscCall(PetscSplitOwnership(comm,&n,&N));
4437     }
4438     /* Check sum(n) = N */
4439     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4440     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4441 
4442     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4443     rstart -= m;
4444 
4445     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);PetscCall(ierr);
4446     for (i=0; i<m; i++) {
4447       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4448       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4449       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4450     }
4451 
4452     PetscCall(MatCreate(comm,outmat));
4453     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4454     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4455     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4456     PetscCall(MatGetRootType_Private(inmat,&rootType));
4457     PetscCall(MatSetType(*outmat,rootType));
4458     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4459     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4460     ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
4461     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4462   }
4463 
4464   /* numeric phase */
4465   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4466   for (i=0; i<m; i++) {
4467     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4468     Ii   = i + rstart;
4469     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4470     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4471   }
4472   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4473   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4478 {
4479   PetscMPIInt       rank;
4480   PetscInt          m,N,i,rstart,nnz;
4481   size_t            len;
4482   const PetscInt    *indx;
4483   PetscViewer       out;
4484   char              *name;
4485   Mat               B;
4486   const PetscScalar *values;
4487 
4488   PetscFunctionBegin;
4489   PetscCall(MatGetLocalSize(A,&m,NULL));
4490   PetscCall(MatGetSize(A,NULL,&N));
4491   /* Should this be the type of the diagonal block of A? */
4492   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4493   PetscCall(MatSetSizes(B,m,N,m,N));
4494   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4495   PetscCall(MatSetType(B,MATSEQAIJ));
4496   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4497   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4498   for (i=0; i<m; i++) {
4499     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4500     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4501     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4502   }
4503   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4504   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4505 
4506   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4507   PetscCall(PetscStrlen(outfile,&len));
4508   PetscCall(PetscMalloc1(len+6,&name));
4509   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4510   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4511   PetscCall(PetscFree(name));
4512   PetscCall(MatView(B,out));
4513   PetscCall(PetscViewerDestroy(&out));
4514   PetscCall(MatDestroy(&B));
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4519 {
4520   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4521 
4522   PetscFunctionBegin;
4523   if (!merge) PetscFunctionReturn(0);
4524   PetscCall(PetscFree(merge->id_r));
4525   PetscCall(PetscFree(merge->len_s));
4526   PetscCall(PetscFree(merge->len_r));
4527   PetscCall(PetscFree(merge->bi));
4528   PetscCall(PetscFree(merge->bj));
4529   PetscCall(PetscFree(merge->buf_ri[0]));
4530   PetscCall(PetscFree(merge->buf_ri));
4531   PetscCall(PetscFree(merge->buf_rj[0]));
4532   PetscCall(PetscFree(merge->buf_rj));
4533   PetscCall(PetscFree(merge->coi));
4534   PetscCall(PetscFree(merge->coj));
4535   PetscCall(PetscFree(merge->owners_co));
4536   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4537   PetscCall(PetscFree(merge));
4538   PetscFunctionReturn(0);
4539 }
4540 
4541 #include <../src/mat/utils/freespace.h>
4542 #include <petscbt.h>
4543 
4544 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4545 {
4546   MPI_Comm            comm;
4547   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4548   PetscMPIInt         size,rank,taga,*len_s;
4549   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4550   PetscInt            proc,m;
4551   PetscInt            **buf_ri,**buf_rj;
4552   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4553   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4554   MPI_Request         *s_waits,*r_waits;
4555   MPI_Status          *status;
4556   const MatScalar     *aa,*a_a;
4557   MatScalar           **abuf_r,*ba_i;
4558   Mat_Merge_SeqsToMPI *merge;
4559   PetscContainer      container;
4560 
4561   PetscFunctionBegin;
4562   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4563   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4564 
4565   PetscCallMPI(MPI_Comm_size(comm,&size));
4566   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4567 
4568   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4569   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4570   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4571   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4572   aa   = a_a;
4573 
4574   bi     = merge->bi;
4575   bj     = merge->bj;
4576   buf_ri = merge->buf_ri;
4577   buf_rj = merge->buf_rj;
4578 
4579   PetscCall(PetscMalloc1(size,&status));
4580   owners = merge->rowmap->range;
4581   len_s  = merge->len_s;
4582 
4583   /* send and recv matrix values */
4584   /*-----------------------------*/
4585   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4586   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4587 
4588   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4589   for (proc=0,k=0; proc<size; proc++) {
4590     if (!len_s[proc]) continue;
4591     i    = owners[proc];
4592     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4593     k++;
4594   }
4595 
4596   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4597   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4598   PetscCall(PetscFree(status));
4599 
4600   PetscCall(PetscFree(s_waits));
4601   PetscCall(PetscFree(r_waits));
4602 
4603   /* insert mat values of mpimat */
4604   /*----------------------------*/
4605   PetscCall(PetscMalloc1(N,&ba_i));
4606   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4607 
4608   for (k=0; k<merge->nrecv; k++) {
4609     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4610     nrows       = *(buf_ri_k[k]);
4611     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4612     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4613   }
4614 
4615   /* set values of ba */
4616   m    = merge->rowmap->n;
4617   for (i=0; i<m; i++) {
4618     arow = owners[rank] + i;
4619     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4620     bnzi = bi[i+1] - bi[i];
4621     PetscCall(PetscArrayzero(ba_i,bnzi));
4622 
4623     /* add local non-zero vals of this proc's seqmat into ba */
4624     anzi   = ai[arow+1] - ai[arow];
4625     aj     = a->j + ai[arow];
4626     aa     = a_a + ai[arow];
4627     nextaj = 0;
4628     for (j=0; nextaj<anzi; j++) {
4629       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4630         ba_i[j] += aa[nextaj++];
4631       }
4632     }
4633 
4634     /* add received vals into ba */
4635     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4636       /* i-th row */
4637       if (i == *nextrow[k]) {
4638         anzi   = *(nextai[k]+1) - *nextai[k];
4639         aj     = buf_rj[k] + *(nextai[k]);
4640         aa     = abuf_r[k] + *(nextai[k]);
4641         nextaj = 0;
4642         for (j=0; nextaj<anzi; j++) {
4643           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4644             ba_i[j] += aa[nextaj++];
4645           }
4646         }
4647         nextrow[k]++; nextai[k]++;
4648       }
4649     }
4650     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4651   }
4652   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4653   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4654   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4655 
4656   PetscCall(PetscFree(abuf_r[0]));
4657   PetscCall(PetscFree(abuf_r));
4658   PetscCall(PetscFree(ba_i));
4659   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4660   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4665 {
4666   PetscErrorCode      ierr;
4667   Mat                 B_mpi;
4668   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4669   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4670   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4671   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4672   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4673   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4674   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4675   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4676   MPI_Status          *status;
4677   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4678   PetscBT             lnkbt;
4679   Mat_Merge_SeqsToMPI *merge;
4680   PetscContainer      container;
4681 
4682   PetscFunctionBegin;
4683   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4684 
4685   /* make sure it is a PETSc comm */
4686   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4687   PetscCallMPI(MPI_Comm_size(comm,&size));
4688   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4689 
4690   PetscCall(PetscNew(&merge));
4691   PetscCall(PetscMalloc1(size,&status));
4692 
4693   /* determine row ownership */
4694   /*---------------------------------------------------------*/
4695   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4696   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4697   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4698   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4699   PetscCall(PetscLayoutSetUp(merge->rowmap));
4700   PetscCall(PetscMalloc1(size,&len_si));
4701   PetscCall(PetscMalloc1(size,&merge->len_s));
4702 
4703   m      = merge->rowmap->n;
4704   owners = merge->rowmap->range;
4705 
4706   /* determine the number of messages to send, their lengths */
4707   /*---------------------------------------------------------*/
4708   len_s = merge->len_s;
4709 
4710   len          = 0; /* length of buf_si[] */
4711   merge->nsend = 0;
4712   for (proc=0; proc<size; proc++) {
4713     len_si[proc] = 0;
4714     if (proc == rank) {
4715       len_s[proc] = 0;
4716     } else {
4717       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4718       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4719     }
4720     if (len_s[proc]) {
4721       merge->nsend++;
4722       nrows = 0;
4723       for (i=owners[proc]; i<owners[proc+1]; i++) {
4724         if (ai[i+1] > ai[i]) nrows++;
4725       }
4726       len_si[proc] = 2*(nrows+1);
4727       len         += len_si[proc];
4728     }
4729   }
4730 
4731   /* determine the number and length of messages to receive for ij-structure */
4732   /*-------------------------------------------------------------------------*/
4733   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4734   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4735 
4736   /* post the Irecv of j-structure */
4737   /*-------------------------------*/
4738   PetscCall(PetscCommGetNewTag(comm,&tagj));
4739   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4740 
4741   /* post the Isend of j-structure */
4742   /*--------------------------------*/
4743   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4744 
4745   for (proc=0, k=0; proc<size; proc++) {
4746     if (!len_s[proc]) continue;
4747     i    = owners[proc];
4748     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4749     k++;
4750   }
4751 
4752   /* receives and sends of j-structure are complete */
4753   /*------------------------------------------------*/
4754   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4755   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4756 
4757   /* send and recv i-structure */
4758   /*---------------------------*/
4759   PetscCall(PetscCommGetNewTag(comm,&tagi));
4760   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4761 
4762   PetscCall(PetscMalloc1(len+1,&buf_s));
4763   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4764   for (proc=0,k=0; proc<size; proc++) {
4765     if (!len_s[proc]) continue;
4766     /* form outgoing message for i-structure:
4767          buf_si[0]:                 nrows to be sent
4768                [1:nrows]:           row index (global)
4769                [nrows+1:2*nrows+1]: i-structure index
4770     */
4771     /*-------------------------------------------*/
4772     nrows       = len_si[proc]/2 - 1;
4773     buf_si_i    = buf_si + nrows+1;
4774     buf_si[0]   = nrows;
4775     buf_si_i[0] = 0;
4776     nrows       = 0;
4777     for (i=owners[proc]; i<owners[proc+1]; i++) {
4778       anzi = ai[i+1] - ai[i];
4779       if (anzi) {
4780         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4781         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4782         nrows++;
4783       }
4784     }
4785     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4786     k++;
4787     buf_si += len_si[proc];
4788   }
4789 
4790   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4791   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4792 
4793   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4794   for (i=0; i<merge->nrecv; i++) {
4795     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4796   }
4797 
4798   PetscCall(PetscFree(len_si));
4799   PetscCall(PetscFree(len_ri));
4800   PetscCall(PetscFree(rj_waits));
4801   PetscCall(PetscFree2(si_waits,sj_waits));
4802   PetscCall(PetscFree(ri_waits));
4803   PetscCall(PetscFree(buf_s));
4804   PetscCall(PetscFree(status));
4805 
4806   /* compute a local seq matrix in each processor */
4807   /*----------------------------------------------*/
4808   /* allocate bi array and free space for accumulating nonzero column info */
4809   PetscCall(PetscMalloc1(m+1,&bi));
4810   bi[0] = 0;
4811 
4812   /* create and initialize a linked list */
4813   nlnk = N+1;
4814   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4815 
4816   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4817   len  = ai[owners[rank+1]] - ai[owners[rank]];
4818   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4819 
4820   current_space = free_space;
4821 
4822   /* determine symbolic info for each local row */
4823   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4824 
4825   for (k=0; k<merge->nrecv; k++) {
4826     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4827     nrows       = *buf_ri_k[k];
4828     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4829     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4830   }
4831 
4832   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);PetscCall(ierr);
4833   len  = 0;
4834   for (i=0; i<m; i++) {
4835     bnzi = 0;
4836     /* add local non-zero cols of this proc's seqmat into lnk */
4837     arow  = owners[rank] + i;
4838     anzi  = ai[arow+1] - ai[arow];
4839     aj    = a->j + ai[arow];
4840     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4841     bnzi += nlnk;
4842     /* add received col data into lnk */
4843     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4844       if (i == *nextrow[k]) { /* i-th row */
4845         anzi  = *(nextai[k]+1) - *nextai[k];
4846         aj    = buf_rj[k] + *nextai[k];
4847         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4848         bnzi += nlnk;
4849         nextrow[k]++; nextai[k]++;
4850       }
4851     }
4852     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4853 
4854     /* if free space is not available, make more free space */
4855     if (current_space->local_remaining<bnzi) {
4856       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4857       nspacedouble++;
4858     }
4859     /* copy data into free space, then initialize lnk */
4860     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4861     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4862 
4863     current_space->array           += bnzi;
4864     current_space->local_used      += bnzi;
4865     current_space->local_remaining -= bnzi;
4866 
4867     bi[i+1] = bi[i] + bnzi;
4868   }
4869 
4870   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4871 
4872   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4873   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4874   PetscCall(PetscLLDestroy(lnk,lnkbt));
4875 
4876   /* create symbolic parallel matrix B_mpi */
4877   /*---------------------------------------*/
4878   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4879   PetscCall(MatCreate(comm,&B_mpi));
4880   if (n==PETSC_DECIDE) {
4881     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4882   } else {
4883     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4884   }
4885   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4886   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4887   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4888   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
4889   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4890 
4891   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4892   B_mpi->assembled  = PETSC_FALSE;
4893   merge->bi         = bi;
4894   merge->bj         = bj;
4895   merge->buf_ri     = buf_ri;
4896   merge->buf_rj     = buf_rj;
4897   merge->coi        = NULL;
4898   merge->coj        = NULL;
4899   merge->owners_co  = NULL;
4900 
4901   PetscCall(PetscCommDestroy(&comm));
4902 
4903   /* attach the supporting struct to B_mpi for reuse */
4904   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4905   PetscCall(PetscContainerSetPointer(container,merge));
4906   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4907   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4908   PetscCall(PetscContainerDestroy(&container));
4909   *mpimat = B_mpi;
4910 
4911   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4912   PetscFunctionReturn(0);
4913 }
4914 
4915 /*@C
4916       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4917                  matrices from each processor
4918 
4919     Collective
4920 
4921    Input Parameters:
4922 +    comm - the communicators the parallel matrix will live on
4923 .    seqmat - the input sequential matrices
4924 .    m - number of local rows (or PETSC_DECIDE)
4925 .    n - number of local columns (or PETSC_DECIDE)
4926 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4927 
4928    Output Parameter:
4929 .    mpimat - the parallel matrix generated
4930 
4931     Level: advanced
4932 
4933    Notes:
4934      The dimensions of the sequential matrix in each processor MUST be the same.
4935      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4936      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4937 @*/
4938 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4939 {
4940   PetscMPIInt    size;
4941 
4942   PetscFunctionBegin;
4943   PetscCallMPI(MPI_Comm_size(comm,&size));
4944   if (size == 1) {
4945     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4946     if (scall == MAT_INITIAL_MATRIX) {
4947       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4948     } else {
4949       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4950     }
4951     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4952     PetscFunctionReturn(0);
4953   }
4954   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4955   if (scall == MAT_INITIAL_MATRIX) {
4956     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4957   }
4958   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4959   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4960   PetscFunctionReturn(0);
4961 }
4962 
4963 /*@
4964      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4965           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4966           with MatGetSize()
4967 
4968     Not Collective
4969 
4970    Input Parameters:
4971 +    A - the matrix
4972 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4973 
4974    Output Parameter:
4975 .    A_loc - the local sequential matrix generated
4976 
4977     Level: developer
4978 
4979    Notes:
4980      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4981      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4982      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4983      modify the values of the returned A_loc.
4984 
4985 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
4986 @*/
4987 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4988 {
4989   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
4990   Mat_SeqAIJ        *mat,*a,*b;
4991   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4992   const PetscScalar *aa,*ba,*aav,*bav;
4993   PetscScalar       *ca,*cam;
4994   PetscMPIInt       size;
4995   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4996   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
4997   PetscBool         match;
4998 
4999   PetscFunctionBegin;
5000   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5001   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5002   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5003   if (size == 1) {
5004     if (scall == MAT_INITIAL_MATRIX) {
5005       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5006       *A_loc = mpimat->A;
5007     } else if (scall == MAT_REUSE_MATRIX) {
5008       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5009     }
5010     PetscFunctionReturn(0);
5011   }
5012 
5013   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5014   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5015   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5016   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5017   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5018   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5019   aa   = aav;
5020   ba   = bav;
5021   if (scall == MAT_INITIAL_MATRIX) {
5022     PetscCall(PetscMalloc1(1+am,&ci));
5023     ci[0] = 0;
5024     for (i=0; i<am; i++) {
5025       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5026     }
5027     PetscCall(PetscMalloc1(1+ci[am],&cj));
5028     PetscCall(PetscMalloc1(1+ci[am],&ca));
5029     k    = 0;
5030     for (i=0; i<am; i++) {
5031       ncols_o = bi[i+1] - bi[i];
5032       ncols_d = ai[i+1] - ai[i];
5033       /* off-diagonal portion of A */
5034       for (jo=0; jo<ncols_o; jo++) {
5035         col = cmap[*bj];
5036         if (col >= cstart) break;
5037         cj[k]   = col; bj++;
5038         ca[k++] = *ba++;
5039       }
5040       /* diagonal portion of A */
5041       for (j=0; j<ncols_d; j++) {
5042         cj[k]   = cstart + *aj++;
5043         ca[k++] = *aa++;
5044       }
5045       /* off-diagonal portion of A */
5046       for (j=jo; j<ncols_o; j++) {
5047         cj[k]   = cmap[*bj++];
5048         ca[k++] = *ba++;
5049       }
5050     }
5051     /* put together the new matrix */
5052     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5053     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5054     /* Since these are PETSc arrays, change flags to free them as necessary. */
5055     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5056     mat->free_a  = PETSC_TRUE;
5057     mat->free_ij = PETSC_TRUE;
5058     mat->nonew   = 0;
5059   } else if (scall == MAT_REUSE_MATRIX) {
5060     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5061     ci   = mat->i;
5062     cj   = mat->j;
5063     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5064     for (i=0; i<am; i++) {
5065       /* off-diagonal portion of A */
5066       ncols_o = bi[i+1] - bi[i];
5067       for (jo=0; jo<ncols_o; jo++) {
5068         col = cmap[*bj];
5069         if (col >= cstart) break;
5070         *cam++ = *ba++; bj++;
5071       }
5072       /* diagonal portion of A */
5073       ncols_d = ai[i+1] - ai[i];
5074       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5075       /* off-diagonal portion of A */
5076       for (j=jo; j<ncols_o; j++) {
5077         *cam++ = *ba++; bj++;
5078       }
5079     }
5080     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5081   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5082   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5083   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5084   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5085   PetscFunctionReturn(0);
5086 }
5087 
5088 /*@
5089      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5090           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5091 
5092     Not Collective
5093 
5094    Input Parameters:
5095 +    A - the matrix
5096 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5097 
5098    Output Parameters:
5099 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5100 -    A_loc - the local sequential matrix generated
5101 
5102     Level: developer
5103 
5104    Notes:
5105      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5106 
5107 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5108 
5109 @*/
5110 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5111 {
5112   Mat            Ao,Ad;
5113   const PetscInt *cmap;
5114   PetscMPIInt    size;
5115   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5116 
5117   PetscFunctionBegin;
5118   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5119   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5120   if (size == 1) {
5121     if (scall == MAT_INITIAL_MATRIX) {
5122       PetscCall(PetscObjectReference((PetscObject)Ad));
5123       *A_loc = Ad;
5124     } else if (scall == MAT_REUSE_MATRIX) {
5125       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5126     }
5127     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5128     PetscFunctionReturn(0);
5129   }
5130   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5131   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5132   if (f) {
5133     PetscCall((*f)(A,scall,glob,A_loc));
5134   } else {
5135     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5136     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5137     Mat_SeqAIJ        *c;
5138     PetscInt          *ai = a->i, *aj = a->j;
5139     PetscInt          *bi = b->i, *bj = b->j;
5140     PetscInt          *ci,*cj;
5141     const PetscScalar *aa,*ba;
5142     PetscScalar       *ca;
5143     PetscInt          i,j,am,dn,on;
5144 
5145     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5146     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5147     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5148     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5149     if (scall == MAT_INITIAL_MATRIX) {
5150       PetscInt k;
5151       PetscCall(PetscMalloc1(1+am,&ci));
5152       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5153       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5154       ci[0] = 0;
5155       for (i=0,k=0; i<am; i++) {
5156         const PetscInt ncols_o = bi[i+1] - bi[i];
5157         const PetscInt ncols_d = ai[i+1] - ai[i];
5158         ci[i+1] = ci[i] + ncols_o + ncols_d;
5159         /* diagonal portion of A */
5160         for (j=0; j<ncols_d; j++,k++) {
5161           cj[k] = *aj++;
5162           ca[k] = *aa++;
5163         }
5164         /* off-diagonal portion of A */
5165         for (j=0; j<ncols_o; j++,k++) {
5166           cj[k] = dn + *bj++;
5167           ca[k] = *ba++;
5168         }
5169       }
5170       /* put together the new matrix */
5171       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5172       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5173       /* Since these are PETSc arrays, change flags to free them as necessary. */
5174       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5175       c->free_a  = PETSC_TRUE;
5176       c->free_ij = PETSC_TRUE;
5177       c->nonew   = 0;
5178       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5179     } else if (scall == MAT_REUSE_MATRIX) {
5180       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5181       for (i=0; i<am; i++) {
5182         const PetscInt ncols_d = ai[i+1] - ai[i];
5183         const PetscInt ncols_o = bi[i+1] - bi[i];
5184         /* diagonal portion of A */
5185         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5186         /* off-diagonal portion of A */
5187         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5188       }
5189       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5190     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5191     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5192     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5193     if (glob) {
5194       PetscInt cst, *gidx;
5195 
5196       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5197       PetscCall(PetscMalloc1(dn+on,&gidx));
5198       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5199       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5200       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5201     }
5202   }
5203   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5204   PetscFunctionReturn(0);
5205 }
5206 
5207 /*@C
5208      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5209 
5210     Not Collective
5211 
5212    Input Parameters:
5213 +    A - the matrix
5214 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5215 -    row, col - index sets of rows and columns to extract (or NULL)
5216 
5217    Output Parameter:
5218 .    A_loc - the local sequential matrix generated
5219 
5220     Level: developer
5221 
5222 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5223 
5224 @*/
5225 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5226 {
5227   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5228   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5229   IS             isrowa,iscola;
5230   Mat            *aloc;
5231   PetscBool      match;
5232 
5233   PetscFunctionBegin;
5234   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5235   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5236   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5237   if (!row) {
5238     start = A->rmap->rstart; end = A->rmap->rend;
5239     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5240   } else {
5241     isrowa = *row;
5242   }
5243   if (!col) {
5244     start = A->cmap->rstart;
5245     cmap  = a->garray;
5246     nzA   = a->A->cmap->n;
5247     nzB   = a->B->cmap->n;
5248     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5249     ncols = 0;
5250     for (i=0; i<nzB; i++) {
5251       if (cmap[i] < start) idx[ncols++] = cmap[i];
5252       else break;
5253     }
5254     imark = i;
5255     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5256     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5257     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5258   } else {
5259     iscola = *col;
5260   }
5261   if (scall != MAT_INITIAL_MATRIX) {
5262     PetscCall(PetscMalloc1(1,&aloc));
5263     aloc[0] = *A_loc;
5264   }
5265   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5266   if (!col) { /* attach global id of condensed columns */
5267     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5268   }
5269   *A_loc = aloc[0];
5270   PetscCall(PetscFree(aloc));
5271   if (!row) {
5272     PetscCall(ISDestroy(&isrowa));
5273   }
5274   if (!col) {
5275     PetscCall(ISDestroy(&iscola));
5276   }
5277   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5278   PetscFunctionReturn(0);
5279 }
5280 
5281 /*
5282  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5283  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5284  * on a global size.
5285  * */
5286 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5287 {
5288   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5289   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5290   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5291   PetscMPIInt              owner;
5292   PetscSFNode              *iremote,*oiremote;
5293   const PetscInt           *lrowindices;
5294   PetscSF                  sf,osf;
5295   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5296   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5297   MPI_Comm                 comm;
5298   ISLocalToGlobalMapping   mapping;
5299   const PetscScalar        *pd_a,*po_a;
5300 
5301   PetscFunctionBegin;
5302   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5303   /* plocalsize is the number of roots
5304    * nrows is the number of leaves
5305    * */
5306   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5307   PetscCall(ISGetLocalSize(rows,&nrows));
5308   PetscCall(PetscCalloc1(nrows,&iremote));
5309   PetscCall(ISGetIndices(rows,&lrowindices));
5310   for (i=0;i<nrows;i++) {
5311     /* Find a remote index and an owner for a row
5312      * The row could be local or remote
5313      * */
5314     owner = 0;
5315     lidx  = 0;
5316     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5317     iremote[i].index = lidx;
5318     iremote[i].rank  = owner;
5319   }
5320   /* Create SF to communicate how many nonzero columns for each row */
5321   PetscCall(PetscSFCreate(comm,&sf));
5322   /* SF will figure out the number of nonzero colunms for each row, and their
5323    * offsets
5324    * */
5325   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5326   PetscCall(PetscSFSetFromOptions(sf));
5327   PetscCall(PetscSFSetUp(sf));
5328 
5329   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5330   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5331   PetscCall(PetscCalloc1(nrows,&pnnz));
5332   roffsets[0] = 0;
5333   roffsets[1] = 0;
5334   for (i=0;i<plocalsize;i++) {
5335     /* diag */
5336     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5337     /* off diag */
5338     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5339     /* compute offsets so that we relative location for each row */
5340     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5341     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5342   }
5343   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5344   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5345   /* 'r' means root, and 'l' means leaf */
5346   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5347   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5348   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5349   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5350   PetscCall(PetscSFDestroy(&sf));
5351   PetscCall(PetscFree(roffsets));
5352   PetscCall(PetscFree(nrcols));
5353   dntotalcols = 0;
5354   ontotalcols = 0;
5355   ncol = 0;
5356   for (i=0;i<nrows;i++) {
5357     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5358     ncol = PetscMax(pnnz[i],ncol);
5359     /* diag */
5360     dntotalcols += nlcols[i*2+0];
5361     /* off diag */
5362     ontotalcols += nlcols[i*2+1];
5363   }
5364   /* We do not need to figure the right number of columns
5365    * since all the calculations will be done by going through the raw data
5366    * */
5367   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5368   PetscCall(MatSetUp(*P_oth));
5369   PetscCall(PetscFree(pnnz));
5370   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5371   /* diag */
5372   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5373   /* off diag */
5374   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5375   /* diag */
5376   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5377   /* off diag */
5378   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5379   dntotalcols = 0;
5380   ontotalcols = 0;
5381   ntotalcols  = 0;
5382   for (i=0;i<nrows;i++) {
5383     owner = 0;
5384     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5385     /* Set iremote for diag matrix */
5386     for (j=0;j<nlcols[i*2+0];j++) {
5387       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5388       iremote[dntotalcols].rank    = owner;
5389       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5390       ilocal[dntotalcols++]        = ntotalcols++;
5391     }
5392     /* off diag */
5393     for (j=0;j<nlcols[i*2+1];j++) {
5394       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5395       oiremote[ontotalcols].rank    = owner;
5396       oilocal[ontotalcols++]        = ntotalcols++;
5397     }
5398   }
5399   PetscCall(ISRestoreIndices(rows,&lrowindices));
5400   PetscCall(PetscFree(loffsets));
5401   PetscCall(PetscFree(nlcols));
5402   PetscCall(PetscSFCreate(comm,&sf));
5403   /* P serves as roots and P_oth is leaves
5404    * Diag matrix
5405    * */
5406   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5407   PetscCall(PetscSFSetFromOptions(sf));
5408   PetscCall(PetscSFSetUp(sf));
5409 
5410   PetscCall(PetscSFCreate(comm,&osf));
5411   /* Off diag */
5412   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5413   PetscCall(PetscSFSetFromOptions(osf));
5414   PetscCall(PetscSFSetUp(osf));
5415   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5416   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5417   /* We operate on the matrix internal data for saving memory */
5418   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5419   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5420   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5421   /* Convert to global indices for diag matrix */
5422   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5423   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5424   /* We want P_oth store global indices */
5425   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5426   /* Use memory scalable approach */
5427   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5428   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5429   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5430   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5431   /* Convert back to local indices */
5432   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5433   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5434   nout = 0;
5435   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5436   PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5437   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5438   /* Exchange values */
5439   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5440   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5441   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5442   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5443   /* Stop PETSc from shrinking memory */
5444   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5445   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5446   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5447   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5448   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5449   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5450   PetscCall(PetscSFDestroy(&sf));
5451   PetscCall(PetscSFDestroy(&osf));
5452   PetscFunctionReturn(0);
5453 }
5454 
5455 /*
5456  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5457  * This supports MPIAIJ and MAIJ
5458  * */
5459 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5460 {
5461   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5462   Mat_SeqAIJ            *p_oth;
5463   IS                    rows,map;
5464   PetscHMapI            hamp;
5465   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5466   MPI_Comm              comm;
5467   PetscSF               sf,osf;
5468   PetscBool             has;
5469 
5470   PetscFunctionBegin;
5471   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5472   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5473   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5474    *  and then create a submatrix (that often is an overlapping matrix)
5475    * */
5476   if (reuse == MAT_INITIAL_MATRIX) {
5477     /* Use a hash table to figure out unique keys */
5478     PetscCall(PetscHMapICreate(&hamp));
5479     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5480     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5481     count = 0;
5482     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5483     for (i=0;i<a->B->cmap->n;i++) {
5484       key  = a->garray[i]/dof;
5485       PetscCall(PetscHMapIHas(hamp,key,&has));
5486       if (!has) {
5487         mapping[i] = count;
5488         PetscCall(PetscHMapISet(hamp,key,count++));
5489       } else {
5490         /* Current 'i' has the same value the previous step */
5491         mapping[i] = count-1;
5492       }
5493     }
5494     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5495     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5496     PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5497     PetscCall(PetscCalloc1(htsize,&rowindices));
5498     off = 0;
5499     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5500     PetscCall(PetscHMapIDestroy(&hamp));
5501     PetscCall(PetscSortInt(htsize,rowindices));
5502     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5503     /* In case, the matrix was already created but users want to recreate the matrix */
5504     PetscCall(MatDestroy(P_oth));
5505     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5506     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5507     PetscCall(ISDestroy(&map));
5508     PetscCall(ISDestroy(&rows));
5509   } else if (reuse == MAT_REUSE_MATRIX) {
5510     /* If matrix was already created, we simply update values using SF objects
5511      * that as attached to the matrix ealier.
5512      */
5513     const PetscScalar *pd_a,*po_a;
5514 
5515     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5516     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5517     PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5518     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5519     /* Update values in place */
5520     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5521     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5522     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5523     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5524     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5525     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5526     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5527     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5528   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5529   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5530   PetscFunctionReturn(0);
5531 }
5532 
5533 /*@C
5534   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5535 
5536   Collective on Mat
5537 
5538   Input Parameters:
5539 + A - the first matrix in mpiaij format
5540 . B - the second matrix in mpiaij format
5541 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5542 
5543   Output Parameters:
5544 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5545 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5546 - B_seq - the sequential matrix generated
5547 
5548   Level: developer
5549 
5550 @*/
5551 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5552 {
5553   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5554   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5555   IS             isrowb,iscolb;
5556   Mat            *bseq=NULL;
5557 
5558   PetscFunctionBegin;
5559   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5560     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5561   }
5562   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5563 
5564   if (scall == MAT_INITIAL_MATRIX) {
5565     start = A->cmap->rstart;
5566     cmap  = a->garray;
5567     nzA   = a->A->cmap->n;
5568     nzB   = a->B->cmap->n;
5569     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5570     ncols = 0;
5571     for (i=0; i<nzB; i++) {  /* row < local row index */
5572       if (cmap[i] < start) idx[ncols++] = cmap[i];
5573       else break;
5574     }
5575     imark = i;
5576     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5577     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5578     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5579     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5580   } else {
5581     PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5582     isrowb  = *rowb; iscolb = *colb;
5583     PetscCall(PetscMalloc1(1,&bseq));
5584     bseq[0] = *B_seq;
5585   }
5586   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5587   *B_seq = bseq[0];
5588   PetscCall(PetscFree(bseq));
5589   if (!rowb) {
5590     PetscCall(ISDestroy(&isrowb));
5591   } else {
5592     *rowb = isrowb;
5593   }
5594   if (!colb) {
5595     PetscCall(ISDestroy(&iscolb));
5596   } else {
5597     *colb = iscolb;
5598   }
5599   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5600   PetscFunctionReturn(0);
5601 }
5602 
5603 /*
5604     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5605     of the OFF-DIAGONAL portion of local A
5606 
5607     Collective on Mat
5608 
5609    Input Parameters:
5610 +    A,B - the matrices in mpiaij format
5611 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5612 
5613    Output Parameter:
5614 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5615 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5616 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5617 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5618 
5619     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5620      for this matrix. This is not desirable..
5621 
5622     Level: developer
5623 
5624 */
5625 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5626 {
5627   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5628   Mat_SeqAIJ             *b_oth;
5629   VecScatter             ctx;
5630   MPI_Comm               comm;
5631   const PetscMPIInt      *rprocs,*sprocs;
5632   const PetscInt         *srow,*rstarts,*sstarts;
5633   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5634   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5635   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5636   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5637   PetscMPIInt            size,tag,rank,nreqs;
5638 
5639   PetscFunctionBegin;
5640   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5641   PetscCallMPI(MPI_Comm_size(comm,&size));
5642 
5643   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5644     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5645   }
5646   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5647   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5648 
5649   if (size == 1) {
5650     startsj_s = NULL;
5651     bufa_ptr  = NULL;
5652     *B_oth    = NULL;
5653     PetscFunctionReturn(0);
5654   }
5655 
5656   ctx = a->Mvctx;
5657   tag = ((PetscObject)ctx)->tag;
5658 
5659   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5660   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5661   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5662   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5663   PetscCall(PetscMalloc1(nreqs,&reqs));
5664   rwaits = reqs;
5665   swaits = reqs + nrecvs;
5666 
5667   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5668   if (scall == MAT_INITIAL_MATRIX) {
5669     /* i-array */
5670     /*---------*/
5671     /*  post receives */
5672     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5673     for (i=0; i<nrecvs; i++) {
5674       rowlen = rvalues + rstarts[i]*rbs;
5675       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5676       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5677     }
5678 
5679     /* pack the outgoing message */
5680     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5681 
5682     sstartsj[0] = 0;
5683     rstartsj[0] = 0;
5684     len         = 0; /* total length of j or a array to be sent */
5685     if (nsends) {
5686       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5687       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5688     }
5689     for (i=0; i<nsends; i++) {
5690       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5691       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5692       for (j=0; j<nrows; j++) {
5693         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5694         for (l=0; l<sbs; l++) {
5695           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5696 
5697           rowlen[j*sbs+l] = ncols;
5698 
5699           len += ncols;
5700           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5701         }
5702         k++;
5703       }
5704       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5705 
5706       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5707     }
5708     /* recvs and sends of i-array are completed */
5709     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5710     PetscCall(PetscFree(svalues));
5711 
5712     /* allocate buffers for sending j and a arrays */
5713     PetscCall(PetscMalloc1(len+1,&bufj));
5714     PetscCall(PetscMalloc1(len+1,&bufa));
5715 
5716     /* create i-array of B_oth */
5717     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5718 
5719     b_othi[0] = 0;
5720     len       = 0; /* total length of j or a array to be received */
5721     k         = 0;
5722     for (i=0; i<nrecvs; i++) {
5723       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5724       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5725       for (j=0; j<nrows; j++) {
5726         b_othi[k+1] = b_othi[k] + rowlen[j];
5727         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5728         k++;
5729       }
5730       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5731     }
5732     PetscCall(PetscFree(rvalues));
5733 
5734     /* allocate space for j and a arrrays of B_oth */
5735     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5736     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5737 
5738     /* j-array */
5739     /*---------*/
5740     /*  post receives of j-array */
5741     for (i=0; i<nrecvs; i++) {
5742       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5743       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5744     }
5745 
5746     /* pack the outgoing message j-array */
5747     if (nsends) k = sstarts[0];
5748     for (i=0; i<nsends; i++) {
5749       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5750       bufJ  = bufj+sstartsj[i];
5751       for (j=0; j<nrows; j++) {
5752         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5753         for (ll=0; ll<sbs; ll++) {
5754           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5755           for (l=0; l<ncols; l++) {
5756             *bufJ++ = cols[l];
5757           }
5758           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5759         }
5760       }
5761       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5762     }
5763 
5764     /* recvs and sends of j-array are completed */
5765     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5766   } else if (scall == MAT_REUSE_MATRIX) {
5767     sstartsj = *startsj_s;
5768     rstartsj = *startsj_r;
5769     bufa     = *bufa_ptr;
5770     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5771     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5772   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5773 
5774   /* a-array */
5775   /*---------*/
5776   /*  post receives of a-array */
5777   for (i=0; i<nrecvs; i++) {
5778     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5779     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5780   }
5781 
5782   /* pack the outgoing message a-array */
5783   if (nsends) k = sstarts[0];
5784   for (i=0; i<nsends; i++) {
5785     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5786     bufA  = bufa+sstartsj[i];
5787     for (j=0; j<nrows; j++) {
5788       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5789       for (ll=0; ll<sbs; ll++) {
5790         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5791         for (l=0; l<ncols; l++) {
5792           *bufA++ = vals[l];
5793         }
5794         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5795       }
5796     }
5797     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5798   }
5799   /* recvs and sends of a-array are completed */
5800   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5801   PetscCall(PetscFree(reqs));
5802 
5803   if (scall == MAT_INITIAL_MATRIX) {
5804     /* put together the new matrix */
5805     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5806 
5807     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5808     /* Since these are PETSc arrays, change flags to free them as necessary. */
5809     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5810     b_oth->free_a  = PETSC_TRUE;
5811     b_oth->free_ij = PETSC_TRUE;
5812     b_oth->nonew   = 0;
5813 
5814     PetscCall(PetscFree(bufj));
5815     if (!startsj_s || !bufa_ptr) {
5816       PetscCall(PetscFree2(sstartsj,rstartsj));
5817       PetscCall(PetscFree(bufa_ptr));
5818     } else {
5819       *startsj_s = sstartsj;
5820       *startsj_r = rstartsj;
5821       *bufa_ptr  = bufa;
5822     }
5823   } else if (scall == MAT_REUSE_MATRIX) {
5824     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5825   }
5826 
5827   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5828   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5829   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5830   PetscFunctionReturn(0);
5831 }
5832 
5833 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5834 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5835 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5836 #if defined(PETSC_HAVE_MKL_SPARSE)
5837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5838 #endif
5839 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5840 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5841 #if defined(PETSC_HAVE_ELEMENTAL)
5842 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5843 #endif
5844 #if defined(PETSC_HAVE_SCALAPACK)
5845 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5846 #endif
5847 #if defined(PETSC_HAVE_HYPRE)
5848 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5849 #endif
5850 #if defined(PETSC_HAVE_CUDA)
5851 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5852 #endif
5853 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5855 #endif
5856 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5857 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5858 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5859 
5860 /*
5861     Computes (B'*A')' since computing B*A directly is untenable
5862 
5863                n                       p                          p
5864         [             ]       [             ]         [                 ]
5865       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5866         [             ]       [             ]         [                 ]
5867 
5868 */
5869 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5870 {
5871   Mat            At,Bt,Ct;
5872 
5873   PetscFunctionBegin;
5874   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5875   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5876   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5877   PetscCall(MatDestroy(&At));
5878   PetscCall(MatDestroy(&Bt));
5879   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5880   PetscCall(MatDestroy(&Ct));
5881   PetscFunctionReturn(0);
5882 }
5883 
5884 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5885 {
5886   PetscBool      cisdense;
5887 
5888   PetscFunctionBegin;
5889   PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5890   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5891   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5892   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5893   if (!cisdense) {
5894     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5895   }
5896   PetscCall(MatSetUp(C));
5897 
5898   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5899   PetscFunctionReturn(0);
5900 }
5901 
5902 /* ----------------------------------------------------------------*/
5903 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5904 {
5905   Mat_Product *product = C->product;
5906   Mat         A = product->A,B=product->B;
5907 
5908   PetscFunctionBegin;
5909   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5910     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5911 
5912   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5913   C->ops->productsymbolic = MatProductSymbolic_AB;
5914   PetscFunctionReturn(0);
5915 }
5916 
5917 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5918 {
5919   Mat_Product    *product = C->product;
5920 
5921   PetscFunctionBegin;
5922   if (product->type == MATPRODUCT_AB) {
5923     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5924   }
5925   PetscFunctionReturn(0);
5926 }
5927 
5928 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
5929    is greater than value, or last if there is no such element.
5930 */
5931 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
5932 {
5933   PetscCount  it,step,count = last - first;
5934 
5935   PetscFunctionBegin;
5936   while (count > 0) {
5937     it   = first;
5938     step = count / 2;
5939     it  += step;
5940     if (!(value < array[it])) {
5941       first  = ++it;
5942       count -= step + 1;
5943     } else count = step;
5944   }
5945   *upper = first;
5946   PetscFunctionReturn(0);
5947 }
5948 
5949 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix
5950 
5951   Input Parameters:
5952 
5953     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5954     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5955 
5956     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat
5957 
5958     For Set1, j1[] contains column indices of the nonzeros.
5959     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5960     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5961     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
5962 
5963     Similar for Set2.
5964 
5965     This routine merges the two sets of nonzeros row by row and removes repeats.
5966 
5967   Output Parameters: (memories are allocated by the caller)
5968 
5969     i[],j[]: the CSR of the merged matrix, which has m rows.
5970     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5971     imap2[]: similar to imap1[], but for Set2.
5972     Note we order nonzeros row-by-row and from left to right.
5973 */
5974 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
5975   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
5976   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
5977 {
5978   PetscInt       r,m; /* Row index of mat */
5979   PetscCount     t,t1,t2,b1,e1,b2,e2;
5980 
5981   PetscFunctionBegin;
5982   PetscCall(MatGetLocalSize(mat,&m,NULL));
5983   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
5984   i[0] = 0;
5985   for (r=0; r<m; r++) { /* Do row by row merging */
5986     b1   = rowBegin1[r];
5987     e1   = rowEnd1[r];
5988     b2   = rowBegin2[r];
5989     e2   = rowEnd2[r];
5990     while (b1 < e1 && b2 < e2) {
5991       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
5992         j[t]      = j1[b1];
5993         imap1[t1] = t;
5994         imap2[t2] = t;
5995         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
5996         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
5997         t1++; t2++; t++;
5998       } else if (j1[b1] < j2[b2]) {
5999         j[t]      = j1[b1];
6000         imap1[t1] = t;
6001         b1       += jmap1[t1+1] - jmap1[t1];
6002         t1++; t++;
6003       } else {
6004         j[t]      = j2[b2];
6005         imap2[t2] = t;
6006         b2       += jmap2[t2+1] - jmap2[t2];
6007         t2++; t++;
6008       }
6009     }
6010     /* Merge the remaining in either j1[] or j2[] */
6011     while (b1 < e1) {
6012       j[t]      = j1[b1];
6013       imap1[t1] = t;
6014       b1       += jmap1[t1+1] - jmap1[t1];
6015       t1++; t++;
6016     }
6017     while (b2 < e2) {
6018       j[t]      = j2[b2];
6019       imap2[t2] = t;
6020       b2       += jmap2[t2+1] - jmap2[t2];
6021       t2++; t++;
6022     }
6023     i[r+1] = t;
6024   }
6025   PetscFunctionReturn(0);
6026 }
6027 
6028 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block
6029 
6030   Input Parameters:
6031     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6032     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6033       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6034 
6035       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6036       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6037 
6038   Output Parameters:
6039     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6040     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6041       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6042       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6043 
6044     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6045       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6046         repeats (i.e., same 'i,j' pair).
6047       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6048         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6049 
6050       Atot: number of entries belonging to the diagonal block
6051       Annz: number of unique nonzeros belonging to the diagonal block.
6052 
6053     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6054 
6055     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
6056 */
6057 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6058   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6059   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6060   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6061 {
6062   PetscInt          cstart,cend,rstart,rend,row,col;
6063   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6064   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6065   PetscCount        k,m,p,q,r,s,mid;
6066   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6067 
6068   PetscFunctionBegin;
6069   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6070   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6071   m    = rend - rstart;
6072 
6073   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6074 
6075   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6076      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6077   */
6078   while (k<n) {
6079     row = i[k];
6080     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6081     for (s=k; s<n; s++) if (i[s] != row) break;
6082     for (p=k; p<s; p++) {
6083       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6084       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6085     }
6086     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6087     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6088     rowBegin[row-rstart] = k;
6089     rowMid[row-rstart]   = mid;
6090     rowEnd[row-rstart]   = s;
6091 
6092     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6093     Atot += mid - k;
6094     Btot += s - mid;
6095 
6096     /* Count unique nonzeros of this diag/offdiag row */
6097     for (p=k; p<mid;) {
6098       col = j[p];
6099       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6100       Annz++;
6101     }
6102 
6103     for (p=mid; p<s;) {
6104       col = j[p];
6105       do {p++;} while (p<s && j[p] == col);
6106       Bnnz++;
6107     }
6108     k = s;
6109   }
6110 
6111   /* Allocation according to Atot, Btot, Annz, Bnnz */
6112   PetscCall(PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap));
6113 
6114   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6115   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6116   for (r=0; r<m; r++) {
6117     k     = rowBegin[r];
6118     mid   = rowMid[r];
6119     s     = rowEnd[r];
6120     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6121     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6122     Atot += mid - k;
6123     Btot += s - mid;
6124 
6125     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6126     for (p=k; p<mid;) {
6127       col = j[p];
6128       q   = p;
6129       do {p++;} while (p<mid && j[p] == col);
6130       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6131       Annz++;
6132     }
6133 
6134     for (p=mid; p<s;) {
6135       col = j[p];
6136       q   = p;
6137       do {p++;} while (p<s && j[p] == col);
6138       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6139       Bnnz++;
6140     }
6141   }
6142   /* Output */
6143   *Aperm_ = Aperm;
6144   *Annz_  = Annz;
6145   *Atot_  = Atot;
6146   *Ajmap_ = Ajmap;
6147   *Bperm_ = Bperm;
6148   *Bnnz_  = Bnnz;
6149   *Btot_  = Btot;
6150   *Bjmap_ = Bjmap;
6151   PetscFunctionReturn(0);
6152 }
6153 
6154 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6155 {
6156   MPI_Comm                  comm;
6157   PetscMPIInt               rank,size;
6158   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6159   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6160   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6161 
6162   PetscFunctionBegin;
6163   PetscCall(PetscFree(mpiaij->garray));
6164   PetscCall(VecDestroy(&mpiaij->lvec));
6165 #if defined(PETSC_USE_CTABLE)
6166   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6167 #else
6168   PetscCall(PetscFree(mpiaij->colmap));
6169 #endif
6170   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6171   mat->assembled = PETSC_FALSE;
6172   mat->was_assembled = PETSC_FALSE;
6173   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6174 
6175   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6176   PetscCallMPI(MPI_Comm_size(comm,&size));
6177   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6178   PetscCall(PetscLayoutSetUp(mat->rmap));
6179   PetscCall(PetscLayoutSetUp(mat->cmap));
6180   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6181   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6182   PetscCall(MatGetLocalSize(mat,&m,&n));
6183   PetscCall(MatGetSize(mat,&M,&N));
6184 
6185   /* ---------------------------------------------------------------------------*/
6186   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6187   /* entries come first, then local rows, then remote rows.                     */
6188   /* ---------------------------------------------------------------------------*/
6189   PetscCount n1 = coo_n,*perm1;
6190   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6191   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6192   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6193   PetscCall(PetscArraycpy(j1,coo_j,n1));
6194   for (k=0; k<n1; k++) perm1[k] = k;
6195 
6196   /* Manipulate indices so that entries with negative row or col indices will have smallest
6197      row indices, local entries will have greater but negative row indices, and remote entries
6198      will have positive row indices.
6199   */
6200   for (k=0; k<n1; k++) {
6201     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6202     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6203     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6204     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6205   }
6206 
6207   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6208   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6209   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6210   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6211   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6212 
6213   /* ---------------------------------------------------------------------------*/
6214   /*           Split local rows into diag/offdiag portions                      */
6215   /* ---------------------------------------------------------------------------*/
6216   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6217   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6218   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6219 
6220   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6221   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6222   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6223 
6224   /* ---------------------------------------------------------------------------*/
6225   /*           Send remote rows to their owner                                  */
6226   /* ---------------------------------------------------------------------------*/
6227   /* Find which rows should be sent to which remote ranks*/
6228   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6229   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6230   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6231   const PetscInt *ranges;
6232   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6233 
6234   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6235   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6236   for (k=rem; k<n1;) {
6237     PetscMPIInt  owner;
6238     PetscInt     firstRow,lastRow;
6239 
6240     /* Locate a row range */
6241     firstRow = i1[k]; /* first row of this owner */
6242     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6243     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6244 
6245     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6246     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6247 
6248     /* All entries in [k,p) belong to this remote owner */
6249     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6250       PetscMPIInt *sendto2;
6251       PetscInt    *nentries2;
6252       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6253 
6254       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6255       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6256       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6257       PetscCall(PetscFree2(sendto,nentries2));
6258       sendto      = sendto2;
6259       nentries    = nentries2;
6260       maxNsend    = maxNsend2;
6261     }
6262     sendto[nsend]   = owner;
6263     nentries[nsend] = p - k;
6264     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6265     nsend++;
6266     k = p;
6267   }
6268 
6269   /* Build 1st SF to know offsets on remote to send data */
6270   PetscSF     sf1;
6271   PetscInt    nroots = 1,nroots2 = 0;
6272   PetscInt    nleaves = nsend,nleaves2 = 0;
6273   PetscInt    *offsets;
6274   PetscSFNode *iremote;
6275 
6276   PetscCall(PetscSFCreate(comm,&sf1));
6277   PetscCall(PetscMalloc1(nsend,&iremote));
6278   PetscCall(PetscMalloc1(nsend,&offsets));
6279   for (k=0; k<nsend; k++) {
6280     iremote[k].rank  = sendto[k];
6281     iremote[k].index = 0;
6282     nleaves2        += nentries[k];
6283     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6284   }
6285   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6286   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6287   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6288   PetscCall(PetscSFDestroy(&sf1));
6289   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);
6290 
6291   /* Build 2nd SF to send remote COOs to their owner */
6292   PetscSF sf2;
6293   nroots  = nroots2;
6294   nleaves = nleaves2;
6295   PetscCall(PetscSFCreate(comm,&sf2));
6296   PetscCall(PetscSFSetFromOptions(sf2));
6297   PetscCall(PetscMalloc1(nleaves,&iremote));
6298   p       = 0;
6299   for (k=0; k<nsend; k++) {
6300     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6301     for (q=0; q<nentries[k]; q++,p++) {
6302       iremote[p].rank  = sendto[k];
6303       iremote[p].index = offsets[k] + q;
6304     }
6305   }
6306   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6307 
6308   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
6309   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6310 
6311   /* Send the remote COOs to their owner */
6312   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6313   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6314   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6315   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6316   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6317   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6318   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6319 
6320   PetscCall(PetscFree(offsets));
6321   PetscCall(PetscFree2(sendto,nentries));
6322 
6323   /* ---------------------------------------------------------------*/
6324   /* Sort received COOs by row along with the permutation array     */
6325   /* ---------------------------------------------------------------*/
6326   for (k=0; k<n2; k++) perm2[k] = k;
6327   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6328 
6329   /* ---------------------------------------------------------------*/
6330   /* Split received COOs into diag/offdiag portions                 */
6331   /* ---------------------------------------------------------------*/
6332   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6333   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6334   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6335 
6336   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6337   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6338 
6339   /* --------------------------------------------------------------------------*/
6340   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6341   /* --------------------------------------------------------------------------*/
6342   PetscInt   *Ai,*Bi;
6343   PetscInt   *Aj,*Bj;
6344 
6345   PetscCall(PetscMalloc1(m+1,&Ai));
6346   PetscCall(PetscMalloc1(m+1,&Bi));
6347   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6348   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6349 
6350   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6351   PetscCall(PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2));
6352 
6353   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6354   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6355   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6356   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6357   PetscCall(PetscFree3(i1,j1,perm1));
6358   PetscCall(PetscFree3(i2,j2,perm2));
6359 
6360   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6361   PetscInt Annz = Ai[m];
6362   PetscInt Bnnz = Bi[m];
6363   if (Annz < Annz1 + Annz2) {
6364     PetscInt *Aj_new;
6365     PetscCall(PetscMalloc1(Annz,&Aj_new));
6366     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6367     PetscCall(PetscFree(Aj));
6368     Aj   = Aj_new;
6369   }
6370 
6371   if (Bnnz < Bnnz1 + Bnnz2) {
6372     PetscInt *Bj_new;
6373     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6374     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6375     PetscCall(PetscFree(Bj));
6376     Bj   = Bj_new;
6377   }
6378 
6379   /* --------------------------------------------------------------------------------*/
6380   /* Create new submatrices for on-process and off-process coupling                  */
6381   /* --------------------------------------------------------------------------------*/
6382   PetscScalar   *Aa,*Ba;
6383   MatType       rtype;
6384   Mat_SeqAIJ    *a,*b;
6385   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6386   PetscCall(PetscCalloc1(Bnnz,&Ba));
6387   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6388   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6389   PetscCall(MatDestroy(&mpiaij->A));
6390   PetscCall(MatDestroy(&mpiaij->B));
6391   PetscCall(MatGetRootType_Private(mat,&rtype));
6392   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6393   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6394   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6395 
6396   a = (Mat_SeqAIJ*)mpiaij->A->data;
6397   b = (Mat_SeqAIJ*)mpiaij->B->data;
6398   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6399   a->free_a       = b->free_a       = PETSC_TRUE;
6400   a->free_ij      = b->free_ij      = PETSC_TRUE;
6401 
6402   /* conversion must happen AFTER multiply setup */
6403   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6404   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6405   PetscCall(VecDestroy(&mpiaij->lvec));
6406   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6407   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6408 
6409   mpiaij->coo_n   = coo_n;
6410   mpiaij->coo_sf  = sf2;
6411   mpiaij->sendlen = nleaves;
6412   mpiaij->recvlen = nroots;
6413 
6414   mpiaij->Annz1   = Annz1;
6415   mpiaij->Annz2   = Annz2;
6416   mpiaij->Bnnz1   = Bnnz1;
6417   mpiaij->Bnnz2   = Bnnz2;
6418 
6419   mpiaij->Atot1   = Atot1;
6420   mpiaij->Atot2   = Atot2;
6421   mpiaij->Btot1   = Btot1;
6422   mpiaij->Btot2   = Btot2;
6423 
6424   mpiaij->Aimap1  = Aimap1;
6425   mpiaij->Aimap2  = Aimap2;
6426   mpiaij->Bimap1  = Bimap1;
6427   mpiaij->Bimap2  = Bimap2;
6428 
6429   mpiaij->Ajmap1  = Ajmap1;
6430   mpiaij->Ajmap2  = Ajmap2;
6431   mpiaij->Bjmap1  = Bjmap1;
6432   mpiaij->Bjmap2  = Bjmap2;
6433 
6434   mpiaij->Aperm1  = Aperm1;
6435   mpiaij->Aperm2  = Aperm2;
6436   mpiaij->Bperm1  = Bperm1;
6437   mpiaij->Bperm2  = Bperm2;
6438 
6439   mpiaij->Cperm1  = Cperm1;
6440 
6441   /* Allocate in preallocation. If not used, it has zero cost on host */
6442   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6443   PetscFunctionReturn(0);
6444 }
6445 
6446 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6447 {
6448   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6449   Mat                  A = mpiaij->A,B = mpiaij->B;
6450   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6451   PetscScalar          *Aa,*Ba;
6452   PetscScalar          *sendbuf = mpiaij->sendbuf;
6453   PetscScalar          *recvbuf = mpiaij->recvbuf;
6454   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6455   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6456   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6457   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6458 
6459   PetscFunctionBegin;
6460   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6461   PetscCall(MatSeqAIJGetArray(B,&Ba));
6462   if (imode == INSERT_VALUES) {
6463     PetscCall(PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar)));
6464     PetscCall(PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar)));
6465   }
6466 
6467   /* Pack entries to be sent to remote */
6468   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6469 
6470   /* Send remote entries to their owner and overlap the communication with local computation */
6471   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6472   /* Add local entries to A and B */
6473   for (PetscCount i=0; i<Annz1; i++) {
6474     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6475   }
6476   for (PetscCount i=0; i<Bnnz1; i++) {
6477     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6478   }
6479   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6480 
6481   /* Add received remote entries to A and B */
6482   for (PetscCount i=0; i<Annz2; i++) {
6483     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6484   }
6485   for (PetscCount i=0; i<Bnnz2; i++) {
6486     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6487   }
6488   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6489   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6490   PetscFunctionReturn(0);
6491 }
6492 
6493 /* ----------------------------------------------------------------*/
6494 
6495 /*MC
6496    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6497 
6498    Options Database Keys:
6499 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6500 
6501    Level: beginner
6502 
6503    Notes:
6504     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6505     in this case the values associated with the rows and columns one passes in are set to zero
6506     in the matrix
6507 
6508     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6509     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6510 
6511 .seealso: MatCreateAIJ()
6512 M*/
6513 
6514 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6515 {
6516   Mat_MPIAIJ     *b;
6517   PetscMPIInt    size;
6518 
6519   PetscFunctionBegin;
6520   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6521 
6522   PetscCall(PetscNewLog(B,&b));
6523   B->data       = (void*)b;
6524   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6525   B->assembled  = PETSC_FALSE;
6526   B->insertmode = NOT_SET_VALUES;
6527   b->size       = size;
6528 
6529   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6530 
6531   /* build cache for off array entries formed */
6532   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6533 
6534   b->donotstash  = PETSC_FALSE;
6535   b->colmap      = NULL;
6536   b->garray      = NULL;
6537   b->roworiented = PETSC_TRUE;
6538 
6539   /* stuff used for matrix vector multiply */
6540   b->lvec  = NULL;
6541   b->Mvctx = NULL;
6542 
6543   /* stuff for MatGetRow() */
6544   b->rowindices   = NULL;
6545   b->rowvalues    = NULL;
6546   b->getrowactive = PETSC_FALSE;
6547 
6548   /* flexible pointer used in CUSPARSE classes */
6549   b->spptr = NULL;
6550 
6551   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6552   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6553   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6554   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6555   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6556   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6557   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6558   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6559   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6560   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6561 #if defined(PETSC_HAVE_CUDA)
6562   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6563 #endif
6564 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6565   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6566 #endif
6567 #if defined(PETSC_HAVE_MKL_SPARSE)
6568   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6569 #endif
6570   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6571   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6572   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6573   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6574 #if defined(PETSC_HAVE_ELEMENTAL)
6575   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6576 #endif
6577 #if defined(PETSC_HAVE_SCALAPACK)
6578   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6579 #endif
6580   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6581   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6582 #if defined(PETSC_HAVE_HYPRE)
6583   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6584   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6585 #endif
6586   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6587   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6588   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6589   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6590   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6591   PetscFunctionReturn(0);
6592 }
6593 
6594 /*@C
6595      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6596          and "off-diagonal" part of the matrix in CSR format.
6597 
6598    Collective
6599 
6600    Input Parameters:
6601 +  comm - MPI communicator
6602 .  m - number of local rows (Cannot be PETSC_DECIDE)
6603 .  n - This value should be the same as the local size used in creating the
6604        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6605        calculated if N is given) For square matrices n is almost always m.
6606 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6607 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6608 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6609 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6610 .   a - matrix values
6611 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6612 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6613 -   oa - matrix values
6614 
6615    Output Parameter:
6616 .   mat - the matrix
6617 
6618    Level: advanced
6619 
6620    Notes:
6621        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6622        must free the arrays once the matrix has been destroyed and not before.
6623 
6624        The i and j indices are 0 based
6625 
6626        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6627 
6628        This sets local rows and cannot be used to set off-processor values.
6629 
6630        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6631        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6632        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6633        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6634        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6635        communication if it is known that only local entries will be set.
6636 
6637 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6638           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6639 @*/
6640 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6641 {
6642   Mat_MPIAIJ     *maij;
6643 
6644   PetscFunctionBegin;
6645   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6646   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6647   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6648   PetscCall(MatCreate(comm,mat));
6649   PetscCall(MatSetSizes(*mat,m,n,M,N));
6650   PetscCall(MatSetType(*mat,MATMPIAIJ));
6651   maij = (Mat_MPIAIJ*) (*mat)->data;
6652 
6653   (*mat)->preallocated = PETSC_TRUE;
6654 
6655   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6656   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6657 
6658   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6659   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6660 
6661   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6662   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6663   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6664   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6665   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6666   PetscFunctionReturn(0);
6667 }
6668 
6669 typedef struct {
6670   Mat       *mp;    /* intermediate products */
6671   PetscBool *mptmp; /* is the intermediate product temporary ? */
6672   PetscInt  cp;     /* number of intermediate products */
6673 
6674   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6675   PetscInt    *startsj_s,*startsj_r;
6676   PetscScalar *bufa;
6677   Mat         P_oth;
6678 
6679   /* may take advantage of merging product->B */
6680   Mat Bloc; /* B-local by merging diag and off-diag */
6681 
6682   /* cusparse does not have support to split between symbolic and numeric phases.
6683      When api_user is true, we don't need to update the numerical values
6684      of the temporary storage */
6685   PetscBool reusesym;
6686 
6687   /* support for COO values insertion */
6688   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6689   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6690   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6691   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6692   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6693   PetscMemType mtype;
6694 
6695   /* customization */
6696   PetscBool abmerge;
6697   PetscBool P_oth_bind;
6698 } MatMatMPIAIJBACKEND;
6699 
6700 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6701 {
6702   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6703   PetscInt            i;
6704 
6705   PetscFunctionBegin;
6706   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6707   PetscCall(PetscFree(mmdata->bufa));
6708   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6709   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6710   PetscCall(MatDestroy(&mmdata->P_oth));
6711   PetscCall(MatDestroy(&mmdata->Bloc));
6712   PetscCall(PetscSFDestroy(&mmdata->sf));
6713   for (i = 0; i < mmdata->cp; i++) {
6714     PetscCall(MatDestroy(&mmdata->mp[i]));
6715   }
6716   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6717   PetscCall(PetscFree(mmdata->own[0]));
6718   PetscCall(PetscFree(mmdata->own));
6719   PetscCall(PetscFree(mmdata->off[0]));
6720   PetscCall(PetscFree(mmdata->off));
6721   PetscCall(PetscFree(mmdata));
6722   PetscFunctionReturn(0);
6723 }
6724 
6725 /* Copy selected n entries with indices in idx[] of A to v[].
6726    If idx is NULL, copy the whole data array of A to v[]
6727  */
6728 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6729 {
6730   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6731 
6732   PetscFunctionBegin;
6733   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6734   if (f) {
6735     PetscCall((*f)(A,n,idx,v));
6736   } else {
6737     const PetscScalar *vv;
6738 
6739     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6740     if (n && idx) {
6741       PetscScalar    *w = v;
6742       const PetscInt *oi = idx;
6743       PetscInt       j;
6744 
6745       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6746     } else {
6747       PetscCall(PetscArraycpy(v,vv,n));
6748     }
6749     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6750   }
6751   PetscFunctionReturn(0);
6752 }
6753 
6754 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6755 {
6756   MatMatMPIAIJBACKEND *mmdata;
6757   PetscInt            i,n_d,n_o;
6758 
6759   PetscFunctionBegin;
6760   MatCheckProduct(C,1);
6761   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6762   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6763   if (!mmdata->reusesym) { /* update temporary matrices */
6764     if (mmdata->P_oth) {
6765       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6766     }
6767     if (mmdata->Bloc) {
6768       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6769     }
6770   }
6771   mmdata->reusesym = PETSC_FALSE;
6772 
6773   for (i = 0; i < mmdata->cp; i++) {
6774     PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6775     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6776   }
6777   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6778     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6779 
6780     if (mmdata->mptmp[i]) continue;
6781     if (noff) {
6782       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6783 
6784       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6785       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6786       n_o += noff;
6787       n_d += nown;
6788     } else {
6789       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6790 
6791       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6792       n_d += mm->nz;
6793     }
6794   }
6795   if (mmdata->hasoffproc) { /* offprocess insertion */
6796     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6797     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6798   }
6799   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6800   PetscFunctionReturn(0);
6801 }
6802 
6803 /* Support for Pt * A, A * P, or Pt * A * P */
6804 #define MAX_NUMBER_INTERMEDIATE 4
6805 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6806 {
6807   Mat_Product            *product = C->product;
6808   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6809   Mat_MPIAIJ             *a,*p;
6810   MatMatMPIAIJBACKEND    *mmdata;
6811   ISLocalToGlobalMapping P_oth_l2g = NULL;
6812   IS                     glob = NULL;
6813   const char             *prefix;
6814   char                   pprefix[256];
6815   const PetscInt         *globidx,*P_oth_idx;
6816   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6817   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6818   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6819                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6820                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6821   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6822 
6823   MatProductType         ptype;
6824   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6825   PetscMPIInt            size;
6826   PetscErrorCode         ierr;
6827 
6828   PetscFunctionBegin;
6829   MatCheckProduct(C,1);
6830   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6831   ptype = product->type;
6832   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6833     ptype = MATPRODUCT_AB;
6834     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6835   }
6836   switch (ptype) {
6837   case MATPRODUCT_AB:
6838     A = product->A;
6839     P = product->B;
6840     m = A->rmap->n;
6841     n = P->cmap->n;
6842     M = A->rmap->N;
6843     N = P->cmap->N;
6844     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6845     break;
6846   case MATPRODUCT_AtB:
6847     P = product->A;
6848     A = product->B;
6849     m = P->cmap->n;
6850     n = A->cmap->n;
6851     M = P->cmap->N;
6852     N = A->cmap->N;
6853     hasoffproc = PETSC_TRUE;
6854     break;
6855   case MATPRODUCT_PtAP:
6856     A = product->A;
6857     P = product->B;
6858     m = P->cmap->n;
6859     n = P->cmap->n;
6860     M = P->cmap->N;
6861     N = P->cmap->N;
6862     hasoffproc = PETSC_TRUE;
6863     break;
6864   default:
6865     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6866   }
6867   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6868   if (size == 1) hasoffproc = PETSC_FALSE;
6869 
6870   /* defaults */
6871   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6872     mp[i]    = NULL;
6873     mptmp[i] = PETSC_FALSE;
6874     rmapt[i] = -1;
6875     cmapt[i] = -1;
6876     rmapa[i] = NULL;
6877     cmapa[i] = NULL;
6878   }
6879 
6880   /* customization */
6881   PetscCall(PetscNew(&mmdata));
6882   mmdata->reusesym = product->api_user;
6883   if (ptype == MATPRODUCT_AB) {
6884     if (product->api_user) {
6885       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");PetscCall(ierr);
6886       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6887       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6888       ierr = PetscOptionsEnd();PetscCall(ierr);
6889     } else {
6890       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");PetscCall(ierr);
6891       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6892       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6893       ierr = PetscOptionsEnd();PetscCall(ierr);
6894     }
6895   } else if (ptype == MATPRODUCT_PtAP) {
6896     if (product->api_user) {
6897       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");PetscCall(ierr);
6898       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6899       ierr = PetscOptionsEnd();PetscCall(ierr);
6900     } else {
6901       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr);
6902       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6903       ierr = PetscOptionsEnd();PetscCall(ierr);
6904     }
6905   }
6906   a = (Mat_MPIAIJ*)A->data;
6907   p = (Mat_MPIAIJ*)P->data;
6908   PetscCall(MatSetSizes(C,m,n,M,N));
6909   PetscCall(PetscLayoutSetUp(C->rmap));
6910   PetscCall(PetscLayoutSetUp(C->cmap));
6911   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6912   PetscCall(MatGetOptionsPrefix(C,&prefix));
6913 
6914   cp   = 0;
6915   switch (ptype) {
6916   case MATPRODUCT_AB: /* A * P */
6917     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6918 
6919     /* A_diag * P_local (merged or not) */
6920     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6921       /* P is product->B */
6922       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
6923       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
6924       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6925       PetscCall(MatProductSetFill(mp[cp],product->fill));
6926       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6927       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6928       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6929       mp[cp]->product->api_user = product->api_user;
6930       PetscCall(MatProductSetFromOptions(mp[cp]));
6931       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6932       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6933       PetscCall(ISGetIndices(glob,&globidx));
6934       rmapt[cp] = 1;
6935       cmapt[cp] = 2;
6936       cmapa[cp] = globidx;
6937       mptmp[cp] = PETSC_FALSE;
6938       cp++;
6939     } else { /* A_diag * P_diag and A_diag * P_off */
6940       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
6941       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6942       PetscCall(MatProductSetFill(mp[cp],product->fill));
6943       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6944       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6945       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6946       mp[cp]->product->api_user = product->api_user;
6947       PetscCall(MatProductSetFromOptions(mp[cp]));
6948       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6949       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6950       rmapt[cp] = 1;
6951       cmapt[cp] = 1;
6952       mptmp[cp] = PETSC_FALSE;
6953       cp++;
6954       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
6955       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6956       PetscCall(MatProductSetFill(mp[cp],product->fill));
6957       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6958       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6959       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6960       mp[cp]->product->api_user = product->api_user;
6961       PetscCall(MatProductSetFromOptions(mp[cp]));
6962       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6963       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6964       rmapt[cp] = 1;
6965       cmapt[cp] = 2;
6966       cmapa[cp] = p->garray;
6967       mptmp[cp] = PETSC_FALSE;
6968       cp++;
6969     }
6970 
6971     /* A_off * P_other */
6972     if (mmdata->P_oth) {
6973       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
6974       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
6975       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
6976       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
6977       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
6978       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6979       PetscCall(MatProductSetFill(mp[cp],product->fill));
6980       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6981       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6982       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6983       mp[cp]->product->api_user = product->api_user;
6984       PetscCall(MatProductSetFromOptions(mp[cp]));
6985       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6986       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6987       rmapt[cp] = 1;
6988       cmapt[cp] = 2;
6989       cmapa[cp] = P_oth_idx;
6990       mptmp[cp] = PETSC_FALSE;
6991       cp++;
6992     }
6993     break;
6994 
6995   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6996     /* A is product->B */
6997     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
6998     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6999       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7000       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7001       PetscCall(MatProductSetFill(mp[cp],product->fill));
7002       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7003       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7004       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7005       mp[cp]->product->api_user = product->api_user;
7006       PetscCall(MatProductSetFromOptions(mp[cp]));
7007       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7008       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7009       PetscCall(ISGetIndices(glob,&globidx));
7010       rmapt[cp] = 2;
7011       rmapa[cp] = globidx;
7012       cmapt[cp] = 2;
7013       cmapa[cp] = globidx;
7014       mptmp[cp] = PETSC_FALSE;
7015       cp++;
7016     } else {
7017       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7018       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7019       PetscCall(MatProductSetFill(mp[cp],product->fill));
7020       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7021       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7022       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7023       mp[cp]->product->api_user = product->api_user;
7024       PetscCall(MatProductSetFromOptions(mp[cp]));
7025       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7026       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7027       PetscCall(ISGetIndices(glob,&globidx));
7028       rmapt[cp] = 1;
7029       cmapt[cp] = 2;
7030       cmapa[cp] = globidx;
7031       mptmp[cp] = PETSC_FALSE;
7032       cp++;
7033       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7034       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7035       PetscCall(MatProductSetFill(mp[cp],product->fill));
7036       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7037       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7038       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7039       mp[cp]->product->api_user = product->api_user;
7040       PetscCall(MatProductSetFromOptions(mp[cp]));
7041       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7042       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7043       rmapt[cp] = 2;
7044       rmapa[cp] = p->garray;
7045       cmapt[cp] = 2;
7046       cmapa[cp] = globidx;
7047       mptmp[cp] = PETSC_FALSE;
7048       cp++;
7049     }
7050     break;
7051   case MATPRODUCT_PtAP:
7052     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7053     /* P is product->B */
7054     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7055     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7056     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7057     PetscCall(MatProductSetFill(mp[cp],product->fill));
7058     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7059     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7060     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7061     mp[cp]->product->api_user = product->api_user;
7062     PetscCall(MatProductSetFromOptions(mp[cp]));
7063     PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7064     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7065     PetscCall(ISGetIndices(glob,&globidx));
7066     rmapt[cp] = 2;
7067     rmapa[cp] = globidx;
7068     cmapt[cp] = 2;
7069     cmapa[cp] = globidx;
7070     mptmp[cp] = PETSC_FALSE;
7071     cp++;
7072     if (mmdata->P_oth) {
7073       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7074       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7075       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7076       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7077       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7078       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7079       PetscCall(MatProductSetFill(mp[cp],product->fill));
7080       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7081       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7082       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7083       mp[cp]->product->api_user = product->api_user;
7084       PetscCall(MatProductSetFromOptions(mp[cp]));
7085       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7086       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7087       mptmp[cp] = PETSC_TRUE;
7088       cp++;
7089       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7090       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7091       PetscCall(MatProductSetFill(mp[cp],product->fill));
7092       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7093       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7094       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7095       mp[cp]->product->api_user = product->api_user;
7096       PetscCall(MatProductSetFromOptions(mp[cp]));
7097       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7098       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7099       rmapt[cp] = 2;
7100       rmapa[cp] = globidx;
7101       cmapt[cp] = 2;
7102       cmapa[cp] = P_oth_idx;
7103       mptmp[cp] = PETSC_FALSE;
7104       cp++;
7105     }
7106     break;
7107   default:
7108     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7109   }
7110   /* sanity check */
7111   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7112 
7113   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7114   for (i = 0; i < cp; i++) {
7115     mmdata->mp[i]    = mp[i];
7116     mmdata->mptmp[i] = mptmp[i];
7117   }
7118   mmdata->cp = cp;
7119   C->product->data       = mmdata;
7120   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7121   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7122 
7123   /* memory type */
7124   mmdata->mtype = PETSC_MEMTYPE_HOST;
7125   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7126   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7127   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7128   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7129 
7130   /* prepare coo coordinates for values insertion */
7131 
7132   /* count total nonzeros of those intermediate seqaij Mats
7133     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7134     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7135     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7136   */
7137   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7138     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7139     if (mptmp[cp]) continue;
7140     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7141       const PetscInt *rmap = rmapa[cp];
7142       const PetscInt mr = mp[cp]->rmap->n;
7143       const PetscInt rs = C->rmap->rstart;
7144       const PetscInt re = C->rmap->rend;
7145       const PetscInt *ii  = mm->i;
7146       for (i = 0; i < mr; i++) {
7147         const PetscInt gr = rmap[i];
7148         const PetscInt nz = ii[i+1] - ii[i];
7149         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7150         else ncoo_oown += nz; /* this row is local */
7151       }
7152     } else ncoo_d += mm->nz;
7153   }
7154 
7155   /*
7156     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7157 
7158     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7159 
7160     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7161 
7162     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7163     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7164     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7165 
7166     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7167     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7168   */
7169   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7170   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7171 
7172   /* gather (i,j) of nonzeros inserted by remote procs */
7173   if (hasoffproc) {
7174     PetscSF  msf;
7175     PetscInt ncoo2,*coo_i2,*coo_j2;
7176 
7177     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7178     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7179     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7180 
7181     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7182       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7183       PetscInt   *idxoff = mmdata->off[cp];
7184       PetscInt   *idxown = mmdata->own[cp];
7185       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7186         const PetscInt *rmap = rmapa[cp];
7187         const PetscInt *cmap = cmapa[cp];
7188         const PetscInt *ii  = mm->i;
7189         PetscInt       *coi = coo_i + ncoo_o;
7190         PetscInt       *coj = coo_j + ncoo_o;
7191         const PetscInt mr = mp[cp]->rmap->n;
7192         const PetscInt rs = C->rmap->rstart;
7193         const PetscInt re = C->rmap->rend;
7194         const PetscInt cs = C->cmap->rstart;
7195         for (i = 0; i < mr; i++) {
7196           const PetscInt *jj = mm->j + ii[i];
7197           const PetscInt gr  = rmap[i];
7198           const PetscInt nz  = ii[i+1] - ii[i];
7199           if (gr < rs || gr >= re) { /* this is an offproc row */
7200             for (j = ii[i]; j < ii[i+1]; j++) {
7201               *coi++ = gr;
7202               *idxoff++ = j;
7203             }
7204             if (!cmapt[cp]) { /* already global */
7205               for (j = 0; j < nz; j++) *coj++ = jj[j];
7206             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7207               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7208             } else { /* offdiag */
7209               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7210             }
7211             ncoo_o += nz;
7212           } else { /* this is a local row */
7213             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7214           }
7215         }
7216       }
7217       mmdata->off[cp + 1] = idxoff;
7218       mmdata->own[cp + 1] = idxown;
7219     }
7220 
7221     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7222     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7223     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7224     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7225     ncoo = ncoo_d + ncoo_oown + ncoo2;
7226     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7227     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7228     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7229     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7230     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7231     PetscCall(PetscFree2(coo_i,coo_j));
7232     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7233     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7234     coo_i = coo_i2;
7235     coo_j = coo_j2;
7236   } else { /* no offproc values insertion */
7237     ncoo = ncoo_d;
7238     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7239 
7240     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7241     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7242     PetscCall(PetscSFSetUp(mmdata->sf));
7243   }
7244   mmdata->hasoffproc = hasoffproc;
7245 
7246   /* gather (i,j) of nonzeros inserted locally */
7247   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7248     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7249     PetscInt       *coi = coo_i + ncoo_d;
7250     PetscInt       *coj = coo_j + ncoo_d;
7251     const PetscInt *jj  = mm->j;
7252     const PetscInt *ii  = mm->i;
7253     const PetscInt *cmap = cmapa[cp];
7254     const PetscInt *rmap = rmapa[cp];
7255     const PetscInt mr = mp[cp]->rmap->n;
7256     const PetscInt rs = C->rmap->rstart;
7257     const PetscInt re = C->rmap->rend;
7258     const PetscInt cs = C->cmap->rstart;
7259 
7260     if (mptmp[cp]) continue;
7261     if (rmapt[cp] == 1) { /* consecutive rows */
7262       /* fill coo_i */
7263       for (i = 0; i < mr; i++) {
7264         const PetscInt gr = i + rs;
7265         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7266       }
7267       /* fill coo_j */
7268       if (!cmapt[cp]) { /* type-0, already global */
7269         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7270       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7271         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7272       } else { /* type-2, local to global for sparse columns */
7273         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7274       }
7275       ncoo_d += mm->nz;
7276     } else if (rmapt[cp] == 2) { /* sparse rows */
7277       for (i = 0; i < mr; i++) {
7278         const PetscInt *jj = mm->j + ii[i];
7279         const PetscInt gr  = rmap[i];
7280         const PetscInt nz  = ii[i+1] - ii[i];
7281         if (gr >= rs && gr < re) { /* local rows */
7282           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7283           if (!cmapt[cp]) { /* type-0, already global */
7284             for (j = 0; j < nz; j++) *coj++ = jj[j];
7285           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7286             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7287           } else { /* type-2, local to global for sparse columns */
7288             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7289           }
7290           ncoo_d += nz;
7291         }
7292       }
7293     }
7294   }
7295   if (glob) {
7296     PetscCall(ISRestoreIndices(glob,&globidx));
7297   }
7298   PetscCall(ISDestroy(&glob));
7299   if (P_oth_l2g) {
7300     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7301   }
7302   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7303   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7304   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7305 
7306   /* preallocate with COO data */
7307   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7308   PetscCall(PetscFree2(coo_i,coo_j));
7309   PetscFunctionReturn(0);
7310 }
7311 
7312 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7313 {
7314   Mat_Product *product = mat->product;
7315 #if defined(PETSC_HAVE_DEVICE)
7316   PetscBool    match   = PETSC_FALSE;
7317   PetscBool    usecpu  = PETSC_FALSE;
7318 #else
7319   PetscBool    match   = PETSC_TRUE;
7320 #endif
7321 
7322   PetscFunctionBegin;
7323   MatCheckProduct(mat,1);
7324 #if defined(PETSC_HAVE_DEVICE)
7325   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7326     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7327   }
7328   if (match) { /* we can always fallback to the CPU if requested */
7329     PetscErrorCode ierr;
7330     switch (product->type) {
7331     case MATPRODUCT_AB:
7332       if (product->api_user) {
7333         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");PetscCall(ierr);
7334         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7335         ierr = PetscOptionsEnd();PetscCall(ierr);
7336       } else {
7337         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");PetscCall(ierr);
7338         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7339         ierr = PetscOptionsEnd();PetscCall(ierr);
7340       }
7341       break;
7342     case MATPRODUCT_AtB:
7343       if (product->api_user) {
7344         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");PetscCall(ierr);
7345         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7346         ierr = PetscOptionsEnd();PetscCall(ierr);
7347       } else {
7348         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");PetscCall(ierr);
7349         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7350         ierr = PetscOptionsEnd();PetscCall(ierr);
7351       }
7352       break;
7353     case MATPRODUCT_PtAP:
7354       if (product->api_user) {
7355         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");PetscCall(ierr);
7356         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7357         ierr = PetscOptionsEnd();PetscCall(ierr);
7358       } else {
7359         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr);
7360         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7361         ierr = PetscOptionsEnd();PetscCall(ierr);
7362       }
7363       break;
7364     default:
7365       break;
7366     }
7367     match = (PetscBool)!usecpu;
7368   }
7369 #endif
7370   if (match) {
7371     switch (product->type) {
7372     case MATPRODUCT_AB:
7373     case MATPRODUCT_AtB:
7374     case MATPRODUCT_PtAP:
7375       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7376       break;
7377     default:
7378       break;
7379     }
7380   }
7381   /* fallback to MPIAIJ ops */
7382   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7383   PetscFunctionReturn(0);
7384 }
7385 
7386 /*
7387     Special version for direct calls from Fortran
7388 */
7389 #include <petsc/private/fortranimpl.h>
7390 
7391 /* Change these macros so can be used in void function */
7392 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7393 #undef  PetscCall
7394 #define PetscCall(...) do {                                                                    \
7395     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7396     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7397       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7398       return;                                                                                  \
7399     }                                                                                          \
7400   } while (0)
7401 
7402 #undef SETERRQ
7403 #define SETERRQ(comm,ierr,...) do {                                                            \
7404     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7405     return;                                                                                    \
7406   } while (0)
7407 
7408 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7409 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7410 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7411 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7412 #else
7413 #endif
7414 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7415 {
7416   Mat          mat  = *mmat;
7417   PetscInt     m    = *mm, n = *mn;
7418   InsertMode   addv = *maddv;
7419   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7420   PetscScalar  value;
7421 
7422   MatCheckPreallocated(mat,1);
7423   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7424   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7425   {
7426     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7427     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7428     PetscBool roworiented = aij->roworiented;
7429 
7430     /* Some Variables required in the macro */
7431     Mat        A                    = aij->A;
7432     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7433     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7434     MatScalar  *aa;
7435     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7436     Mat        B                    = aij->B;
7437     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7438     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7439     MatScalar  *ba;
7440     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7441      * cannot use "#if defined" inside a macro. */
7442     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7443 
7444     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7445     PetscInt  nonew = a->nonew;
7446     MatScalar *ap1,*ap2;
7447 
7448     PetscFunctionBegin;
7449     PetscCall(MatSeqAIJGetArray(A,&aa));
7450     PetscCall(MatSeqAIJGetArray(B,&ba));
7451     for (i=0; i<m; i++) {
7452       if (im[i] < 0) continue;
7453       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7454       if (im[i] >= rstart && im[i] < rend) {
7455         row      = im[i] - rstart;
7456         lastcol1 = -1;
7457         rp1      = aj + ai[row];
7458         ap1      = aa + ai[row];
7459         rmax1    = aimax[row];
7460         nrow1    = ailen[row];
7461         low1     = 0;
7462         high1    = nrow1;
7463         lastcol2 = -1;
7464         rp2      = bj + bi[row];
7465         ap2      = ba + bi[row];
7466         rmax2    = bimax[row];
7467         nrow2    = bilen[row];
7468         low2     = 0;
7469         high2    = nrow2;
7470 
7471         for (j=0; j<n; j++) {
7472           if (roworiented) value = v[i*n+j];
7473           else value = v[i+j*m];
7474           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7475           if (in[j] >= cstart && in[j] < cend) {
7476             col = in[j] - cstart;
7477             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7478           } else if (in[j] < 0) continue;
7479           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7480             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7481             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7482           } else {
7483             if (mat->was_assembled) {
7484               if (!aij->colmap) {
7485                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7486               }
7487 #if defined(PETSC_USE_CTABLE)
7488               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
7489               col--;
7490 #else
7491               col = aij->colmap[in[j]] - 1;
7492 #endif
7493               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7494                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7495                 col  =  in[j];
7496                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7497                 B        = aij->B;
7498                 b        = (Mat_SeqAIJ*)B->data;
7499                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7500                 rp2      = bj + bi[row];
7501                 ap2      = ba + bi[row];
7502                 rmax2    = bimax[row];
7503                 nrow2    = bilen[row];
7504                 low2     = 0;
7505                 high2    = nrow2;
7506                 bm       = aij->B->rmap->n;
7507                 ba       = b->a;
7508                 inserted = PETSC_FALSE;
7509               }
7510             } else col = in[j];
7511             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7512           }
7513         }
7514       } else if (!aij->donotstash) {
7515         if (roworiented) {
7516           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7517         } else {
7518           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7519         }
7520       }
7521     }
7522     PetscCall(MatSeqAIJRestoreArray(A,&aa));
7523     PetscCall(MatSeqAIJRestoreArray(B,&ba));
7524   }
7525   PetscFunctionReturnVoid();
7526 }
7527 /* Undefining these here since they were redefined from their original definition above! No
7528  * other PETSc functions should be defined past this point, as it is impossible to recover the
7529  * original definitions */
7530 #undef PetscCall
7531 #undef SETERRQ
7532