xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c3b5f7ba6bc5ce25a01a67bb37ba5d34b02bbbd7)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50 
51   PetscFunctionBegin;
52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
53   A->boundtocpu = flg;
54 #endif
55   if (a->A) {
56     PetscCall(MatBindToCPU(a->A,flg));
57   }
58   if (a->B) {
59     PetscCall(MatBindToCPU(a->B,flg));
60   }
61 
62   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
63    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
64    * to differ from the parent matrix. */
65   if (a->lvec) {
66     PetscCall(VecBindToCPU(a->lvec,flg));
67   }
68   if (a->diag) {
69     PetscCall(VecBindToCPU(a->diag,flg));
70   }
71 
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
76 {
77   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
78 
79   PetscFunctionBegin;
80   if (mat->A) {
81     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
82     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
88 {
89   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
90   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
91   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
92   const PetscInt  *ia,*ib;
93   const MatScalar *aa,*bb,*aav,*bav;
94   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
95   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
96 
97   PetscFunctionBegin;
98   *keptrows = NULL;
99 
100   ia   = a->i;
101   ib   = b->i;
102   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
103   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
104   for (i=0; i<m; i++) {
105     na = ia[i+1] - ia[i];
106     nb = ib[i+1] - ib[i];
107     if (!na && !nb) {
108       cnt++;
109       goto ok1;
110     }
111     aa = aav + ia[i];
112     for (j=0; j<na; j++) {
113       if (aa[j] != 0.0) goto ok1;
114     }
115     bb = bav + ib[i];
116     for (j=0; j <nb; j++) {
117       if (bb[j] != 0.0) goto ok1;
118     }
119     cnt++;
120 ok1:;
121   }
122   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
123   if (!n0rows) {
124     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
125     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
126     PetscFunctionReturn(0);
127   }
128   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
129   cnt  = 0;
130   for (i=0; i<m; i++) {
131     na = ia[i+1] - ia[i];
132     nb = ib[i+1] - ib[i];
133     if (!na && !nb) continue;
134     aa = aav + ia[i];
135     for (j=0; j<na;j++) {
136       if (aa[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141     bb = bav + ib[i];
142     for (j=0; j<nb; j++) {
143       if (bb[j] != 0.0) {
144         rows[cnt++] = rstart + i;
145         goto ok2;
146       }
147     }
148 ok2:;
149   }
150   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
151   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
152   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
157 {
158   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
159   PetscBool         cong;
160 
161   PetscFunctionBegin;
162   PetscCall(MatHasCongruentLayouts(Y,&cong));
163   if (Y->assembled && cong) {
164     PetscCall(MatDiagonalSet(aij->A,D,is));
165   } else {
166     PetscCall(MatDiagonalSet_Default(Y,D,is));
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
172 {
173   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
174   PetscInt       i,rstart,nrows,*rows;
175 
176   PetscFunctionBegin;
177   *zrows = NULL;
178   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
179   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
180   for (i=0; i<nrows; i++) rows[i] += rstart;
181   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
186 {
187   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
188   PetscInt          i,m,n,*garray = aij->garray;
189   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
190   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
191   PetscReal         *work;
192   const PetscScalar *dummy;
193 
194   PetscFunctionBegin;
195   PetscCall(MatGetSize(A,&m,&n));
196   PetscCall(PetscCalloc1(n,&work));
197   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
198   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
199   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
200   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
201   if (type == NORM_2) {
202     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
203       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
204     }
205     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
206       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
207     }
208   } else if (type == NORM_1) {
209     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
210       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
211     }
212     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
213       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214     }
215   } else if (type == NORM_INFINITY) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
221     }
222   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228     }
229   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
235     }
236   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
237   if (type == NORM_INFINITY) {
238     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
239   } else {
240     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
241   }
242   PetscCall(PetscFree(work));
243   if (type == NORM_2) {
244     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
245   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
246     for (i=0; i<n; i++) reductions[i] /= m;
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
252 {
253   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
254   IS              sis,gis;
255   const PetscInt  *isis,*igis;
256   PetscInt        n,*iis,nsis,ngis,rstart,i;
257 
258   PetscFunctionBegin;
259   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
260   PetscCall(MatFindNonzeroRows(a->B,&gis));
261   PetscCall(ISGetSize(gis,&ngis));
262   PetscCall(ISGetSize(sis,&nsis));
263   PetscCall(ISGetIndices(sis,&isis));
264   PetscCall(ISGetIndices(gis,&igis));
265 
266   PetscCall(PetscMalloc1(ngis+nsis,&iis));
267   PetscCall(PetscArraycpy(iis,igis,ngis));
268   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
269   n    = ngis + nsis;
270   PetscCall(PetscSortRemoveDupsInt(&n,iis));
271   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
272   for (i=0; i<n; i++) iis[i] += rstart;
273   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
274 
275   PetscCall(ISRestoreIndices(sis,&isis));
276   PetscCall(ISRestoreIndices(gis,&igis));
277   PetscCall(ISDestroy(&sis));
278   PetscCall(ISDestroy(&gis));
279   PetscFunctionReturn(0);
280 }
281 
282 /*
283   Local utility routine that creates a mapping from the global column
284 number to the local number in the off-diagonal part of the local
285 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
286 a slightly higher hash table cost; without it it is not scalable (each processor
287 has an order N integer array but is fast to access.
288 */
289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
290 {
291   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
292   PetscInt       n = aij->B->cmap->n,i;
293 
294   PetscFunctionBegin;
295   PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
296 #if defined(PETSC_USE_CTABLE)
297   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
298   for (i=0; i<n; i++) {
299     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
300   }
301 #else
302   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
303   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
304   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
305 #endif
306   PetscFunctionReturn(0);
307 }
308 
309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
310 { \
311     if (col <= lastcol1)  low1 = 0;     \
312     else                 high1 = nrow1; \
313     lastcol1 = col;\
314     while (high1-low1 > 5) { \
315       t = (low1+high1)/2; \
316       if (rp1[t] > col) high1 = t; \
317       else              low1  = t; \
318     } \
319       for (_i=low1; _i<high1; _i++) { \
320         if (rp1[_i] > col) break; \
321         if (rp1[_i] == col) { \
322           if (addv == ADD_VALUES) { \
323             ap1[_i] += value;   \
324             /* Not sure LogFlops will slow dow the code or not */ \
325             (void)PetscLogFlops(1.0);   \
326            } \
327           else                    ap1[_i] = value; \
328           goto a_noinsert; \
329         } \
330       }  \
331       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
332       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
333       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
334       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
335       N = nrow1++ - 1; a->nz++; high1++; \
336       /* shift up all the later entries in this row */ \
337       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
338       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
339       rp1[_i] = col;  \
340       ap1[_i] = value;  \
341       A->nonzerostate++;\
342       a_noinsert: ; \
343       ailen[row] = nrow1; \
344 }
345 
346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
347   { \
348     if (col <= lastcol2) low2 = 0;                        \
349     else high2 = nrow2;                                   \
350     lastcol2 = col;                                       \
351     while (high2-low2 > 5) {                              \
352       t = (low2+high2)/2;                                 \
353       if (rp2[t] > col) high2 = t;                        \
354       else             low2  = t;                         \
355     }                                                     \
356     for (_i=low2; _i<high2; _i++) {                       \
357       if (rp2[_i] > col) break;                           \
358       if (rp2[_i] == col) {                               \
359         if (addv == ADD_VALUES) {                         \
360           ap2[_i] += value;                               \
361           (void)PetscLogFlops(1.0);                       \
362         }                                                 \
363         else                    ap2[_i] = value;          \
364         goto b_noinsert;                                  \
365       }                                                   \
366     }                                                     \
367     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
368     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
369     PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
370     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
371     N = nrow2++ - 1; b->nz++; high2++;                    \
372     /* shift up all the later entries in this row */      \
373     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
374     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
375     rp2[_i] = col;                                        \
376     ap2[_i] = value;                                      \
377     B->nonzerostate++;                                    \
378     b_noinsert: ;                                         \
379     bilen[row] = nrow2;                                   \
380   }
381 
382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
383 {
384   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
385   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
386   PetscInt       l,*garray = mat->garray,diag;
387   PetscScalar    *aa,*ba;
388 
389   PetscFunctionBegin;
390   /* code only works for square matrices A */
391 
392   /* find size of row to the left of the diagonal part */
393   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
394   row  = row - diag;
395   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
396     if (garray[b->j[b->i[row]+l]] > diag) break;
397   }
398   if (l) {
399     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
400     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
401     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
402   }
403 
404   /* diagonal part */
405   if (a->i[row+1]-a->i[row]) {
406     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
407     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
408     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
409   }
410 
411   /* right of diagonal part */
412   if (b->i[row+1]-b->i[row]-l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
421 {
422   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
423   PetscScalar    value = 0.0;
424   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
425   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
426   PetscBool      roworiented = aij->roworiented;
427 
428   /* Some Variables required in the macro */
429   Mat        A                    = aij->A;
430   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
431   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
432   PetscBool  ignorezeroentries    = a->ignorezeroentries;
433   Mat        B                    = aij->B;
434   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
435   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
436   MatScalar  *aa,*ba;
437   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
438   PetscInt   nonew;
439   MatScalar  *ap1,*ap2;
440 
441   PetscFunctionBegin;
442   PetscCall(MatSeqAIJGetArray(A,&aa));
443   PetscCall(MatSeqAIJGetArray(B,&ba));
444   for (i=0; i<m; i++) {
445     if (im[i] < 0) continue;
446     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
447     if (im[i] >= rstart && im[i] < rend) {
448       row      = im[i] - rstart;
449       lastcol1 = -1;
450       rp1      = aj + ai[row];
451       ap1      = aa + ai[row];
452       rmax1    = aimax[row];
453       nrow1    = ailen[row];
454       low1     = 0;
455       high1    = nrow1;
456       lastcol2 = -1;
457       rp2      = bj + bi[row];
458       ap2      = ba + bi[row];
459       rmax2    = bimax[row];
460       nrow2    = bilen[row];
461       low2     = 0;
462       high2    = nrow2;
463 
464       for (j=0; j<n; j++) {
465         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
466         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
467         if (in[j] >= cstart && in[j] < cend) {
468           col   = in[j] - cstart;
469           nonew = a->nonew;
470           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
471         } else if (in[j] < 0) continue;
472         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
473         else {
474           if (mat->was_assembled) {
475             if (!aij->colmap) {
476               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
477             }
478 #if defined(PETSC_USE_CTABLE)
479             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
480             col--;
481 #else
482             col = aij->colmap[in[j]] - 1;
483 #endif
484             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
485               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
486               col  =  in[j];
487               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
488               B        = aij->B;
489               b        = (Mat_SeqAIJ*)B->data;
490               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
491               rp2      = bj + bi[row];
492               ap2      = ba + bi[row];
493               rmax2    = bimax[row];
494               nrow2    = bilen[row];
495               low2     = 0;
496               high2    = nrow2;
497               bm       = aij->B->rmap->n;
498               ba       = b->a;
499             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
500               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
501                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
502               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
503             }
504           } else col = in[j];
505           nonew = b->nonew;
506           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
507         }
508       }
509     } else {
510       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
515         } else {
516           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
517         }
518       }
519     }
520   }
521   PetscCall(MatSeqAIJRestoreArray(A,&aa));
522   PetscCall(MatSeqAIJRestoreArray(B,&ba));
523   PetscFunctionReturn(0);
524 }
525 
526 /*
527     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
528     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
529     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
530 */
531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
532 {
533   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
534   Mat            A           = aij->A; /* diagonal part of the matrix */
535   Mat            B           = aij->B; /* offdiagonal part of the matrix */
536   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
537   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
538   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
539   PetscInt       *ailen      = a->ilen,*aj = a->j;
540   PetscInt       *bilen      = b->ilen,*bj = b->j;
541   PetscInt       am          = aij->A->rmap->n,j;
542   PetscInt       diag_so_far = 0,dnz;
543   PetscInt       offd_so_far = 0,onz;
544 
545   PetscFunctionBegin;
546   /* Iterate over all rows of the matrix */
547   for (j=0; j<am; j++) {
548     dnz = onz = 0;
549     /*  Iterate over all non-zero columns of the current row */
550     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
551       /* If column is in the diagonal */
552       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553         aj[diag_so_far++] = mat_j[col] - cstart;
554         dnz++;
555       } else { /* off-diagonal entries */
556         bj[offd_so_far++] = mat_j[col];
557         onz++;
558       }
559     }
560     ailen[j] = dnz;
561     bilen[j] = onz;
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
569     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
570     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
571     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572 */
573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
574 {
575   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
576   Mat            A      = aij->A; /* diagonal part of the matrix */
577   Mat            B      = aij->B; /* offdiagonal part of the matrix */
578   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
579   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
580   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
581   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
582   PetscInt       *ailen = a->ilen,*aj = a->j;
583   PetscInt       *bilen = b->ilen,*bj = b->j;
584   PetscInt       am     = aij->A->rmap->n,j;
585   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
587   PetscScalar    *aa = a->a,*ba = b->a;
588 
589   PetscFunctionBegin;
590   /* Iterate over all rows of the matrix */
591   for (j=0; j<am; j++) {
592     dnz_row = onz_row = 0;
593     rowstart_offd = full_offd_i[j];
594     rowstart_diag = full_diag_i[j];
595     /*  Iterate over all non-zero columns of the current row */
596     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
597       /* If column is in the diagonal */
598       if (mat_j[col] >= cstart && mat_j[col] < cend) {
599         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
600         aa[rowstart_diag+dnz_row] = mat_a[col];
601         dnz_row++;
602       } else { /* off-diagonal entries */
603         bj[rowstart_offd+onz_row] = mat_j[col];
604         ba[rowstart_offd+onz_row] = mat_a[col];
605         onz_row++;
606       }
607     }
608     ailen[j] = dnz_row;
609     bilen[j] = onz_row;
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615 {
616   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
617   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
618   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
619 
620   PetscFunctionBegin;
621   for (i=0; i<m; i++) {
622     if (idxm[i] < 0) continue; /* negative row */
623     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
624     if (idxm[i] >= rstart && idxm[i] < rend) {
625       row = idxm[i] - rstart;
626       for (j=0; j<n; j++) {
627         if (idxn[j] < 0) continue; /* negative column */
628         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
629         if (idxn[j] >= cstart && idxn[j] < cend) {
630           col  = idxn[j] - cstart;
631           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
632         } else {
633           if (!aij->colmap) {
634             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
635           }
636 #if defined(PETSC_USE_CTABLE)
637           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
638           col--;
639 #else
640           col = aij->colmap[idxn[j]] - 1;
641 #endif
642           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
643           else {
644             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
645           }
646         }
647       }
648     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
654 {
655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
662   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
663   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscMPIInt    n;
671   PetscInt       i,j,rstart,ncols,flg;
672   PetscInt       *row,*col;
673   PetscBool      other_disassembled;
674   PetscScalar    *val;
675 
676   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
677 
678   PetscFunctionBegin;
679   if (!aij->donotstash && !mat->nooffprocentries) {
680     while (1) {
681       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
682       if (!flg) break;
683 
684       for (i=0; i<n;) {
685         /* Now identify the consecutive vals belonging to the same row */
686         for (j=i,rstart=row[j]; j<n; j++) {
687           if (row[j] != rstart) break;
688         }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
693         i    = j;
694       }
695     }
696     PetscCall(MatStashScatterEnd_Private(&mat->stash));
697   }
698 #if defined(PETSC_HAVE_DEVICE)
699   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
700   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
701   if (mat->boundtocpu) {
702     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
703     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
704   }
705 #endif
706   PetscCall(MatAssemblyBegin(aij->A,mode));
707   PetscCall(MatAssemblyEnd(aij->A,mode));
708 
709   /* determine if any processor has disassembled, if so we must
710      also disassemble ourself, in order that we may reassemble. */
711   /*
712      if nonzero structure of submatrix B cannot change then we know that
713      no processor disassembled thus we can skip this stuff
714   */
715   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
716     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
717     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
718       PetscCall(MatDisAssemble_MPIAIJ(mat));
719     }
720   }
721   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
722     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
723   }
724   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
725 #if defined(PETSC_HAVE_DEVICE)
726   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
727 #endif
728   PetscCall(MatAssemblyBegin(aij->B,mode));
729   PetscCall(MatAssemblyEnd(aij->B,mode));
730 
731   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
732 
733   aij->rowvalues = NULL;
734 
735   PetscCall(VecDestroy(&aij->diag));
736 
737   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
738   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
739     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
740     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
741   }
742 #if defined(PETSC_HAVE_DEVICE)
743   mat->offloadmask = PETSC_OFFLOAD_BOTH;
744 #endif
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
749 {
750   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
751 
752   PetscFunctionBegin;
753   PetscCall(MatZeroEntries(l->A));
754   PetscCall(MatZeroEntries(l->B));
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
759 {
760   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
761   PetscObjectState sA, sB;
762   PetscInt        *lrows;
763   PetscInt         r, len;
764   PetscBool        cong, lch, gch;
765 
766   PetscFunctionBegin;
767   /* get locally owned rows */
768   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
769   PetscCall(MatHasCongruentLayouts(A,&cong));
770   /* fix right hand side if needed */
771   if (x && b) {
772     const PetscScalar *xx;
773     PetscScalar       *bb;
774 
775     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
776     PetscCall(VecGetArrayRead(x, &xx));
777     PetscCall(VecGetArray(b, &bb));
778     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
779     PetscCall(VecRestoreArrayRead(x, &xx));
780     PetscCall(VecRestoreArray(b, &bb));
781   }
782 
783   sA = mat->A->nonzerostate;
784   sB = mat->B->nonzerostate;
785 
786   if (diag != 0.0 && cong) {
787     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
788     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
789   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
790     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
791     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
792     PetscInt   nnwA, nnwB;
793     PetscBool  nnzA, nnzB;
794 
795     nnwA = aijA->nonew;
796     nnwB = aijB->nonew;
797     nnzA = aijA->keepnonzeropattern;
798     nnzB = aijB->keepnonzeropattern;
799     if (!nnzA) {
800       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
801       aijA->nonew = 0;
802     }
803     if (!nnzB) {
804       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
805       aijB->nonew = 0;
806     }
807     /* Must zero here before the next loop */
808     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
809     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
810     for (r = 0; r < len; ++r) {
811       const PetscInt row = lrows[r] + A->rmap->rstart;
812       if (row >= A->cmap->N) continue;
813       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
814     }
815     aijA->nonew = nnwA;
816     aijB->nonew = nnwB;
817   } else {
818     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
819     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
820   }
821   PetscCall(PetscFree(lrows));
822   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
823   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
824 
825   /* reduce nonzerostate */
826   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
827   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
828   if (gch) A->nonzerostate++;
829   PetscFunctionReturn(0);
830 }
831 
832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
833 {
834   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
835   PetscMPIInt       n = A->rmap->n;
836   PetscInt          i,j,r,m,len = 0;
837   PetscInt          *lrows,*owners = A->rmap->range;
838   PetscMPIInt       p = 0;
839   PetscSFNode       *rrows;
840   PetscSF           sf;
841   const PetscScalar *xx;
842   PetscScalar       *bb,*mask,*aij_a;
843   Vec               xmask,lmask;
844   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
845   const PetscInt    *aj, *ii,*ridx;
846   PetscScalar       *aa;
847 
848   PetscFunctionBegin;
849   /* Create SF where leaves are input rows and roots are owned rows */
850   PetscCall(PetscMalloc1(n, &lrows));
851   for (r = 0; r < n; ++r) lrows[r] = -1;
852   PetscCall(PetscMalloc1(N, &rrows));
853   for (r = 0; r < N; ++r) {
854     const PetscInt idx   = rows[r];
855     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
856     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
857       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
858     }
859     rrows[r].rank  = p;
860     rrows[r].index = rows[r] - owners[p];
861   }
862   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
863   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
864   /* Collect flags for rows to be zeroed */
865   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
866   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
867   PetscCall(PetscSFDestroy(&sf));
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
872   /* handle off diagonal part of matrix */
873   PetscCall(MatCreateVecs(A,&xmask,NULL));
874   PetscCall(VecDuplicate(l->lvec,&lmask));
875   PetscCall(VecGetArray(xmask,&bb));
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   PetscCall(VecRestoreArray(xmask,&bb));
878   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
879   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
880   PetscCall(VecDestroy(&xmask));
881   if (x && b) { /* this code is buggy when the row and column layout don't match */
882     PetscBool cong;
883 
884     PetscCall(MatHasCongruentLayouts(A,&cong));
885     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
886     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
887     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
888     PetscCall(VecGetArrayRead(l->lvec,&xx));
889     PetscCall(VecGetArray(b,&bb));
890   }
891   PetscCall(VecGetArray(lmask,&mask));
892   /* remove zeroed rows of off diagonal matrix */
893   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
894   ii = aij->i;
895   for (i=0; i<len; i++) {
896     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
897   }
898   /* loop over all elements of off process part of matrix zeroing removed columns*/
899   if (aij->compressedrow.use) {
900     m    = aij->compressedrow.nrows;
901     ii   = aij->compressedrow.i;
902     ridx = aij->compressedrow.rindex;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij_a + ii[i];
907 
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[*ridx] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916       ridx++;
917     }
918   } else { /* do not use compressed row format */
919     m = l->B->rmap->n;
920     for (i=0; i<m; i++) {
921       n  = ii[i+1] - ii[i];
922       aj = aij->j + ii[i];
923       aa = aij_a + ii[i];
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[i] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932     }
933   }
934   if (x && b) {
935     PetscCall(VecRestoreArray(b,&bb));
936     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
937   }
938   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
939   PetscCall(VecRestoreArray(lmask,&mask));
940   PetscCall(VecDestroy(&lmask));
941   PetscCall(PetscFree(lrows));
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscInt       nt;
955   VecScatter     Mvctx = a->Mvctx;
956 
957   PetscFunctionBegin;
958   PetscCall(VecGetLocalSize(xx,&nt));
959   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
960   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
961   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
962   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
963   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970 
971   PetscFunctionBegin;
972   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   VecScatter     Mvctx = a->Mvctx;
980 
981   PetscFunctionBegin;
982   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
983   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
984   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
985   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992 
993   PetscFunctionBegin;
994   /* do nondiagonal part */
995   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
996   /* do local part */
997   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
998   /* add partial results together */
999   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1000   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1005 {
1006   MPI_Comm       comm;
1007   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1008   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1009   IS             Me,Notme;
1010   PetscInt       M,N,first,last,*notme,i;
1011   PetscBool      lf;
1012   PetscMPIInt    size;
1013 
1014   PetscFunctionBegin;
1015   /* Easy test: symmetric diagonal block */
1016   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1017   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1018   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1019   if (!*f) PetscFunctionReturn(0);
1020   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1021   PetscCallMPI(MPI_Comm_size(comm,&size));
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025   PetscCall(MatGetSize(Amat,&M,&N));
1026   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1027   PetscCall(PetscMalloc1(N-last+first,&notme));
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1031   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1032   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1033   Aoff = Aoffs[0];
1034   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1035   Boff = Boffs[0];
1036   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1037   PetscCall(MatDestroyMatrices(1,&Aoffs));
1038   PetscCall(MatDestroyMatrices(1,&Boffs));
1039   PetscCall(ISDestroy(&Me));
1040   PetscCall(ISDestroy(&Notme));
1041   PetscCall(PetscFree(notme));
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046 {
1047   PetscFunctionBegin;
1048   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055 
1056   PetscFunctionBegin;
1057   /* do nondiagonal part */
1058   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1059   /* do local part */
1060   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1061   /* add partial results together */
1062   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1063   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1077   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1078   PetscCall(MatGetDiagonal(a->A,v));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1083 {
1084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1085 
1086   PetscFunctionBegin;
1087   PetscCall(MatScale(a->A,aa));
1088   PetscCall(MatScale(a->B,aa));
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1094 {
1095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1096 
1097   PetscFunctionBegin;
1098   PetscCall(PetscSFDestroy(&aij->coo_sf));
1099   PetscCall(PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1));
1100   PetscCall(PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2));
1101   PetscCall(PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2));
1102   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1103   PetscCall(PetscFree(aij->Cperm1));
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110 
1111   PetscFunctionBegin;
1112 #if defined(PETSC_USE_LOG)
1113   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1114 #endif
1115   PetscCall(MatStashDestroy_Private(&mat->stash));
1116   PetscCall(VecDestroy(&aij->diag));
1117   PetscCall(MatDestroy(&aij->A));
1118   PetscCall(MatDestroy(&aij->B));
1119 #if defined(PETSC_USE_CTABLE)
1120   PetscCall(PetscTableDestroy(&aij->colmap));
1121 #else
1122   PetscCall(PetscFree(aij->colmap));
1123 #endif
1124   PetscCall(PetscFree(aij->garray));
1125   PetscCall(VecDestroy(&aij->lvec));
1126   PetscCall(VecScatterDestroy(&aij->Mvctx));
1127   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1128   PetscCall(PetscFree(aij->ld));
1129 
1130   /* Free COO */
1131   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1132 
1133   PetscCall(PetscFree(mat->data));
1134 
1135   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1136   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1137 
1138   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1139   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1140   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1141   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1142   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1143   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1144   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1145   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1146   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1147   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1148 #if defined(PETSC_HAVE_CUDA)
1149   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1150 #endif
1151 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1152   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1153 #endif
1154   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1155 #if defined(PETSC_HAVE_ELEMENTAL)
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1157 #endif
1158 #if defined(PETSC_HAVE_SCALAPACK)
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1160 #endif
1161 #if defined(PETSC_HAVE_HYPRE)
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1164 #endif
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1171 #if defined(PETSC_HAVE_MKL_SPARSE)
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1173 #endif
1174   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1183 {
1184   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1185   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1186   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1187   const PetscInt    *garray = aij->garray;
1188   const PetscScalar *aa,*ba;
1189   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1190   PetscInt          *rowlens;
1191   PetscInt          *colidxs;
1192   PetscScalar       *matvals;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(PetscViewerSetUp(viewer));
1196 
1197   M  = mat->rmap->N;
1198   N  = mat->cmap->N;
1199   m  = mat->rmap->n;
1200   rs = mat->rmap->rstart;
1201   cs = mat->cmap->rstart;
1202   nz = A->nz + B->nz;
1203 
1204   /* write matrix header */
1205   header[0] = MAT_FILE_CLASSID;
1206   header[1] = M; header[2] = N; header[3] = nz;
1207   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1208   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1209 
1210   /* fill in and store row lengths  */
1211   PetscCall(PetscMalloc1(m,&rowlens));
1212   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1213   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1214   PetscCall(PetscFree(rowlens));
1215 
1216   /* fill in and store column indices */
1217   PetscCall(PetscMalloc1(nz,&colidxs));
1218   for (cnt=0, i=0; i<m; i++) {
1219     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1220       if (garray[B->j[jb]] > cs) break;
1221       colidxs[cnt++] = garray[B->j[jb]];
1222     }
1223     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1224       colidxs[cnt++] = A->j[ja] + cs;
1225     for (; jb<B->i[i+1]; jb++)
1226       colidxs[cnt++] = garray[B->j[jb]];
1227   }
1228   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1229   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1230   PetscCall(PetscFree(colidxs));
1231 
1232   /* fill in and store nonzero values */
1233   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1234   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1235   PetscCall(PetscMalloc1(nz,&matvals));
1236   for (cnt=0, i=0; i<m; i++) {
1237     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       matvals[cnt++] = ba[jb];
1240     }
1241     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1242       matvals[cnt++] = aa[ja];
1243     for (; jb<B->i[i+1]; jb++)
1244       matvals[cnt++] = ba[jb];
1245   }
1246   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1247   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1248   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1249   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1250   PetscCall(PetscFree(matvals));
1251 
1252   /* write block size option to the viewer's .info file */
1253   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #include <petscdraw.h>
1258 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1259 {
1260   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1261   PetscMPIInt       rank = aij->rank,size = aij->size;
1262   PetscBool         isdraw,iascii,isbinary;
1263   PetscViewer       sviewer;
1264   PetscViewerFormat format;
1265 
1266   PetscFunctionBegin;
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1268   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1269   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1270   if (iascii) {
1271     PetscCall(PetscViewerGetFormat(viewer,&format));
1272     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1273       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1274       PetscCall(PetscMalloc1(size,&nz));
1275       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1276       for (i=0; i<(PetscInt)size; i++) {
1277         nmax = PetscMax(nmax,nz[i]);
1278         nmin = PetscMin(nmin,nz[i]);
1279         navg += nz[i];
1280       }
1281       PetscCall(PetscFree(nz));
1282       navg = navg/size;
1283       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1284       PetscFunctionReturn(0);
1285     }
1286     PetscCall(PetscViewerGetFormat(viewer,&format));
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscInt *inodes=NULL;
1290 
1291       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1292       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1293       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1294       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1295       if (!inodes) {
1296         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1297                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1298       } else {
1299         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1300                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1301       }
1302       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1303       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1304       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1305       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1306       PetscCall(PetscViewerFlush(viewer));
1307       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1308       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1309       PetscCall(VecScatterView(aij->Mvctx,viewer));
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1314       if (inodes) {
1315         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1316       } else {
1317         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1326       PetscCall(MatView(aij->A,viewer));
1327     } else {
1328       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (iascii && size == 1) {
1332     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1333     PetscCall(MatView(aij->A,viewer));
1334     PetscFunctionReturn(0);
1335   } else if (isdraw) {
1336     PetscDraw draw;
1337     PetscBool isnull;
1338     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1339     PetscCall(PetscDrawIsNull(draw,&isnull));
1340     if (isnull) PetscFunctionReturn(0);
1341   }
1342 
1343   { /* assemble the entire matrix onto first processor */
1344     Mat A = NULL, Av;
1345     IS  isrow,iscol;
1346 
1347     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1348     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1349     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1350     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1351 /*  The commented code uses MatCreateSubMatrices instead */
1352 /*
1353     Mat *AA, A = NULL, Av;
1354     IS  isrow,iscol;
1355 
1356     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1357     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1358     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1359     if (rank == 0) {
1360        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1361        A    = AA[0];
1362        Av   = AA[0];
1363     }
1364     PetscCall(MatDestroySubMatrices(1,&AA));
1365 */
1366     PetscCall(ISDestroy(&iscol));
1367     PetscCall(ISDestroy(&isrow));
1368     /*
1369        Everyone has to call to draw the matrix since the graphics waits are
1370        synchronized across all processors that share the PetscDraw object
1371     */
1372     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1373     if (rank == 0) {
1374       if (((PetscObject)mat)->name) {
1375         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1376       }
1377       PetscCall(MatView_SeqAIJ(Av,sviewer));
1378     }
1379     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1380     PetscCall(PetscViewerFlush(viewer));
1381     PetscCall(MatDestroy(&A));
1382   }
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1387 {
1388   PetscBool      iascii,isdraw,issocket,isbinary;
1389 
1390   PetscFunctionBegin;
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1395   if (iascii || isdraw || isbinary || issocket) {
1396     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1402 {
1403   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1404   Vec            bb1 = NULL;
1405   PetscBool      hasop;
1406 
1407   PetscFunctionBegin;
1408   if (flag == SOR_APPLY_UPPER) {
1409     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1410     PetscFunctionReturn(0);
1411   }
1412 
1413   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1414     PetscCall(VecDuplicate(bb,&bb1));
1415   }
1416 
1417   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1418     if (flag & SOR_ZERO_INITIAL_GUESS) {
1419       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1420       its--;
1421     }
1422 
1423     while (its--) {
1424       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1425       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1426 
1427       /* update rhs: bb1 = bb - B*x */
1428       PetscCall(VecScale(mat->lvec,-1.0));
1429       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1430 
1431       /* local sweep */
1432       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1433     }
1434   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1435     if (flag & SOR_ZERO_INITIAL_GUESS) {
1436       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1437       its--;
1438     }
1439     while (its--) {
1440       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1441       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1442 
1443       /* update rhs: bb1 = bb - B*x */
1444       PetscCall(VecScale(mat->lvec,-1.0));
1445       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1446 
1447       /* local sweep */
1448       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1449     }
1450   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1451     if (flag & SOR_ZERO_INITIAL_GUESS) {
1452       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1453       its--;
1454     }
1455     while (its--) {
1456       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1457       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       PetscCall(VecScale(mat->lvec,-1.0));
1461       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1462 
1463       /* local sweep */
1464       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1465     }
1466   } else if (flag & SOR_EISENSTAT) {
1467     Vec xx1;
1468 
1469     PetscCall(VecDuplicate(bb,&xx1));
1470     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1471 
1472     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1473     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1474     if (!mat->diag) {
1475       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1476       PetscCall(MatGetDiagonal(matin,mat->diag));
1477     }
1478     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1479     if (hasop) {
1480       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1481     } else {
1482       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1483     }
1484     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1485 
1486     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1487 
1488     /* local sweep */
1489     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1490     PetscCall(VecAXPY(xx,1.0,xx1));
1491     PetscCall(VecDestroy(&xx1));
1492   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1493 
1494   PetscCall(VecDestroy(&bb1));
1495 
1496   matin->factorerrortype = mat->A->factorerrortype;
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1501 {
1502   Mat            aA,aB,Aperm;
1503   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1504   PetscScalar    *aa,*ba;
1505   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1506   PetscSF        rowsf,sf;
1507   IS             parcolp = NULL;
1508   PetscBool      done;
1509 
1510   PetscFunctionBegin;
1511   PetscCall(MatGetLocalSize(A,&m,&n));
1512   PetscCall(ISGetIndices(rowp,&rwant));
1513   PetscCall(ISGetIndices(colp,&cwant));
1514   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1515 
1516   /* Invert row permutation to find out where my rows should go */
1517   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1518   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1519   PetscCall(PetscSFSetFromOptions(rowsf));
1520   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1521   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1522   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1523 
1524   /* Invert column permutation to find out where my columns should go */
1525   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1526   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1527   PetscCall(PetscSFSetFromOptions(sf));
1528   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1529   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1530   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1531   PetscCall(PetscSFDestroy(&sf));
1532 
1533   PetscCall(ISRestoreIndices(rowp,&rwant));
1534   PetscCall(ISRestoreIndices(colp,&cwant));
1535   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1536 
1537   /* Find out where my gcols should go */
1538   PetscCall(MatGetSize(aB,NULL,&ng));
1539   PetscCall(PetscMalloc1(ng,&gcdest));
1540   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1541   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1542   PetscCall(PetscSFSetFromOptions(sf));
1543   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1544   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1545   PetscCall(PetscSFDestroy(&sf));
1546 
1547   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1548   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1549   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1550   for (i=0; i<m; i++) {
1551     PetscInt    row = rdest[i];
1552     PetscMPIInt rowner;
1553     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1554     for (j=ai[i]; j<ai[i+1]; j++) {
1555       PetscInt    col = cdest[aj[j]];
1556       PetscMPIInt cowner;
1557       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1558       if (rowner == cowner) dnnz[i]++;
1559       else onnz[i]++;
1560     }
1561     for (j=bi[i]; j<bi[i+1]; j++) {
1562       PetscInt    col = gcdest[bj[j]];
1563       PetscMPIInt cowner;
1564       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568   }
1569   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1570   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1571   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1572   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1573   PetscCall(PetscSFDestroy(&rowsf));
1574 
1575   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1576   PetscCall(MatSeqAIJGetArray(aA,&aa));
1577   PetscCall(MatSeqAIJGetArray(aB,&ba));
1578   for (i=0; i<m; i++) {
1579     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1580     PetscInt j0,rowlen;
1581     rowlen = ai[i+1] - ai[i];
1582     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1583       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1584       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1585     }
1586     rowlen = bi[i+1] - bi[i];
1587     for (j0=j=0; j<rowlen; j0=j) {
1588       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1589       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1590     }
1591   }
1592   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1593   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1594   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1595   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1596   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1597   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1598   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1599   PetscCall(PetscFree3(work,rdest,cdest));
1600   PetscCall(PetscFree(gcdest));
1601   if (parcolp) PetscCall(ISDestroy(&colp));
1602   *B = Aperm;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1607 {
1608   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1612   if (ghosts) *ghosts = aij->garray;
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1617 {
1618   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1619   Mat            A    = mat->A,B = mat->B;
1620   PetscLogDouble isend[5],irecv[5];
1621 
1622   PetscFunctionBegin;
1623   info->block_size = 1.0;
1624   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1625 
1626   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1627   isend[3] = info->memory;  isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1630 
1631   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1632   isend[3] += info->memory;  isend[4] += info->mallocs;
1633   if (flag == MAT_LOCAL) {
1634     info->nz_used      = isend[0];
1635     info->nz_allocated = isend[1];
1636     info->nz_unneeded  = isend[2];
1637     info->memory       = isend[3];
1638     info->mallocs      = isend[4];
1639   } else if (flag == MAT_GLOBAL_MAX) {
1640     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1641 
1642     info->nz_used      = irecv[0];
1643     info->nz_allocated = irecv[1];
1644     info->nz_unneeded  = irecv[2];
1645     info->memory       = irecv[3];
1646     info->mallocs      = irecv[4];
1647   } else if (flag == MAT_GLOBAL_SUM) {
1648     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1649 
1650     info->nz_used      = irecv[0];
1651     info->nz_allocated = irecv[1];
1652     info->nz_unneeded  = irecv[2];
1653     info->memory       = irecv[3];
1654     info->mallocs      = irecv[4];
1655   }
1656   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1657   info->fill_ratio_needed = 0;
1658   info->factor_mallocs    = 0;
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1663 {
1664   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1665 
1666   PetscFunctionBegin;
1667   switch (op) {
1668   case MAT_NEW_NONZERO_LOCATIONS:
1669   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1670   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1671   case MAT_KEEP_NONZERO_PATTERN:
1672   case MAT_NEW_NONZERO_LOCATION_ERR:
1673   case MAT_USE_INODES:
1674   case MAT_IGNORE_ZERO_ENTRIES:
1675   case MAT_FORM_EXPLICIT_TRANSPOSE:
1676     MatCheckPreallocated(A,1);
1677     PetscCall(MatSetOption(a->A,op,flg));
1678     PetscCall(MatSetOption(a->B,op,flg));
1679     break;
1680   case MAT_ROW_ORIENTED:
1681     MatCheckPreallocated(A,1);
1682     a->roworiented = flg;
1683 
1684     PetscCall(MatSetOption(a->A,op,flg));
1685     PetscCall(MatSetOption(a->B,op,flg));
1686     break;
1687   case MAT_FORCE_DIAGONAL_ENTRIES:
1688   case MAT_SORTED_FULL:
1689     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1690     break;
1691   case MAT_IGNORE_OFF_PROC_ENTRIES:
1692     a->donotstash = flg;
1693     break;
1694   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1695   case MAT_SPD:
1696   case MAT_SYMMETRIC:
1697   case MAT_STRUCTURALLY_SYMMETRIC:
1698   case MAT_HERMITIAN:
1699   case MAT_SYMMETRY_ETERNAL:
1700     break;
1701   case MAT_SUBMAT_SINGLEIS:
1702     A->submat_singleis = flg;
1703     break;
1704   case MAT_STRUCTURE_ONLY:
1705     /* The option is handled directly by MatSetOption() */
1706     break;
1707   default:
1708     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1714 {
1715   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1716   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1717   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1718   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1719   PetscInt       *cmap,*idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1730     PetscInt   max = 1,tmp;
1731     for (i=0; i<matin->rmap->n; i++) {
1732       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1736   }
1737 
1738   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1742   if (!v)   {pvA = NULL; pvB = NULL;}
1743   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1744   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1745   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1746   nztot = nzA + nzB;
1747 
1748   cmap = mat->garray;
1749   if (v  || idx) {
1750     if (nztot) {
1751       /* Sort by increasing column numbers, assuming A and B already sorted */
1752       PetscInt imark = -1;
1753       if (v) {
1754         *v = v_p = mat->rowvalues;
1755         for (i=0; i<nzB; i++) {
1756           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1757           else break;
1758         }
1759         imark = i;
1760         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1761         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1762       }
1763       if (idx) {
1764         *idx = idx_p = mat->rowindices;
1765         if (imark > -1) {
1766           for (i=0; i<imark; i++) {
1767             idx_p[i] = cmap[cworkB[i]];
1768           }
1769         } else {
1770           for (i=0; i<nzB; i++) {
1771             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1772             else break;
1773           }
1774           imark = i;
1775         }
1776         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1777         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1778       }
1779     } else {
1780       if (idx) *idx = NULL;
1781       if (v)   *v   = NULL;
1782     }
1783   }
1784   *nz  = nztot;
1785   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1786   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1791 {
1792   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1793 
1794   PetscFunctionBegin;
1795   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1796   aij->getrowactive = PETSC_FALSE;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1801 {
1802   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1803   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1804   PetscInt        i,j,cstart = mat->cmap->rstart;
1805   PetscReal       sum = 0.0;
1806   const MatScalar *v,*amata,*bmata;
1807 
1808   PetscFunctionBegin;
1809   if (aij->size == 1) {
1810     PetscCall(MatNorm(aij->A,type,norm));
1811   } else {
1812     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1813     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1814     if (type == NORM_FROBENIUS) {
1815       v = amata;
1816       for (i=0; i<amat->nz; i++) {
1817         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1818       }
1819       v = bmata;
1820       for (i=0; i<bmat->nz; i++) {
1821         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1822       }
1823       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1824       *norm = PetscSqrtReal(*norm);
1825       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1826     } else if (type == NORM_1) { /* max column norm */
1827       PetscReal *tmp,*tmp2;
1828       PetscInt  *jj,*garray = aij->garray;
1829       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1830       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1831       *norm = 0.0;
1832       v     = amata; jj = amat->j;
1833       for (j=0; j<amat->nz; j++) {
1834         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1835       }
1836       v = bmata; jj = bmat->j;
1837       for (j=0; j<bmat->nz; j++) {
1838         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1839       }
1840       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1841       for (j=0; j<mat->cmap->N; j++) {
1842         if (tmp2[j] > *norm) *norm = tmp2[j];
1843       }
1844       PetscCall(PetscFree(tmp));
1845       PetscCall(PetscFree(tmp2));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1847     } else if (type == NORM_INFINITY) { /* max row norm */
1848       PetscReal ntemp = 0.0;
1849       for (j=0; j<aij->A->rmap->n; j++) {
1850         v   = amata + amat->i[j];
1851         sum = 0.0;
1852         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1853           sum += PetscAbsScalar(*v); v++;
1854         }
1855         v = bmata + bmat->i[j];
1856         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1857           sum += PetscAbsScalar(*v); v++;
1858         }
1859         if (sum > ntemp) ntemp = sum;
1860       }
1861       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1862       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1863     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1864     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1865     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1871 {
1872   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1873   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1874   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1875   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1876   Mat             B,A_diag,*B_diag;
1877   const MatScalar *pbv,*bv;
1878 
1879   PetscFunctionBegin;
1880   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1881   ai = Aloc->i; aj = Aloc->j;
1882   bi = Bloc->i; bj = Bloc->j;
1883   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1884     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1885     PetscSFNode          *oloc;
1886     PETSC_UNUSED PetscSF sf;
1887 
1888     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1889     /* compute d_nnz for preallocation */
1890     PetscCall(PetscArrayzero(d_nnz,na));
1891     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1892     /* compute local off-diagonal contributions */
1893     PetscCall(PetscArrayzero(g_nnz,nb));
1894     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1895     /* map those to global */
1896     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1897     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1898     PetscCall(PetscSFSetFromOptions(sf));
1899     PetscCall(PetscArrayzero(o_nnz,na));
1900     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1901     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1902     PetscCall(PetscSFDestroy(&sf));
1903 
1904     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1905     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1906     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1907     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1908     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1909     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1910   } else {
1911     B    = *matout;
1912     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1913   }
1914 
1915   b           = (Mat_MPIAIJ*)B->data;
1916   A_diag      = a->A;
1917   B_diag      = &b->A;
1918   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1919   A_diag_ncol = A_diag->cmap->N;
1920   B_diag_ilen = sub_B_diag->ilen;
1921   B_diag_i    = sub_B_diag->i;
1922 
1923   /* Set ilen for diagonal of B */
1924   for (i=0; i<A_diag_ncol; i++) {
1925     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1926   }
1927 
1928   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1929   very quickly (=without using MatSetValues), because all writes are local. */
1930   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1931 
1932   /* copy over the B part */
1933   PetscCall(PetscMalloc1(bi[mb],&cols));
1934   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1935   pbv  = bv;
1936   row  = A->rmap->rstart;
1937   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1938   cols_tmp = cols;
1939   for (i=0; i<mb; i++) {
1940     ncol = bi[i+1]-bi[i];
1941     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1942     row++;
1943     pbv += ncol; cols_tmp += ncol;
1944   }
1945   PetscCall(PetscFree(cols));
1946   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1947 
1948   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1949   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1950   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1951     *matout = B;
1952   } else {
1953     PetscCall(MatHeaderMerge(A,&B));
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1959 {
1960   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1961   Mat            a    = aij->A,b = aij->B;
1962   PetscInt       s1,s2,s3;
1963 
1964   PetscFunctionBegin;
1965   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1966   if (rr) {
1967     PetscCall(VecGetLocalSize(rr,&s1));
1968     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1969     /* Overlap communication with computation. */
1970     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1971   }
1972   if (ll) {
1973     PetscCall(VecGetLocalSize(ll,&s1));
1974     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1975     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1976   }
1977   /* scale  the diagonal block */
1978   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1979 
1980   if (rr) {
1981     /* Do a scatter end and then right scale the off-diagonal block */
1982     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1983     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1989 {
1990   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1991 
1992   PetscFunctionBegin;
1993   PetscCall(MatSetUnfactored(a->A));
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1998 {
1999   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2000   Mat            a,b,c,d;
2001   PetscBool      flg;
2002 
2003   PetscFunctionBegin;
2004   a = matA->A; b = matA->B;
2005   c = matB->A; d = matB->B;
2006 
2007   PetscCall(MatEqual(a,c,&flg));
2008   if (flg) {
2009     PetscCall(MatEqual(b,d,&flg));
2010   }
2011   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2016 {
2017   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2018   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2019 
2020   PetscFunctionBegin;
2021   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2022   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2023     /* because of the column compression in the off-processor part of the matrix a->B,
2024        the number of columns in a->B and b->B may be different, hence we cannot call
2025        the MatCopy() directly on the two parts. If need be, we can provide a more
2026        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2027        then copying the submatrices */
2028     PetscCall(MatCopy_Basic(A,B,str));
2029   } else {
2030     PetscCall(MatCopy(a->A,b->A,str));
2031     PetscCall(MatCopy(a->B,b->B,str));
2032   }
2033   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2038 {
2039   PetscFunctionBegin;
2040   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 /*
2045    Computes the number of nonzeros per row needed for preallocation when X and Y
2046    have different nonzero structure.
2047 */
2048 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2049 {
2050   PetscInt       i,j,k,nzx,nzy;
2051 
2052   PetscFunctionBegin;
2053   /* Set the number of nonzeros in the new matrix */
2054   for (i=0; i<m; i++) {
2055     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2056     nzx = xi[i+1] - xi[i];
2057     nzy = yi[i+1] - yi[i];
2058     nnz[i] = 0;
2059     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2060       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2061       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2062       nnz[i]++;
2063     }
2064     for (; k<nzy; k++) nnz[i]++;
2065   }
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2070 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2071 {
2072   PetscInt       m = Y->rmap->N;
2073   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2074   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2075 
2076   PetscFunctionBegin;
2077   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2082 {
2083   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2084 
2085   PetscFunctionBegin;
2086   if (str == SAME_NONZERO_PATTERN) {
2087     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2088     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2089   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2090     PetscCall(MatAXPY_Basic(Y,a,X,str));
2091   } else {
2092     Mat      B;
2093     PetscInt *nnz_d,*nnz_o;
2094 
2095     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2096     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2097     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2098     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2099     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2100     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2101     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2102     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2103     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2104     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2105     PetscCall(MatHeaderMerge(Y,&B));
2106     PetscCall(PetscFree(nnz_d));
2107     PetscCall(PetscFree(nnz_o));
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2113 
2114 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2115 {
2116   PetscFunctionBegin;
2117   if (PetscDefined(USE_COMPLEX)) {
2118     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2119 
2120     PetscCall(MatConjugate_SeqAIJ(aij->A));
2121     PetscCall(MatConjugate_SeqAIJ(aij->B));
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2127 {
2128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2129 
2130   PetscFunctionBegin;
2131   PetscCall(MatRealPart(a->A));
2132   PetscCall(MatRealPart(a->B));
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2137 {
2138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2139 
2140   PetscFunctionBegin;
2141   PetscCall(MatImaginaryPart(a->A));
2142   PetscCall(MatImaginaryPart(a->B));
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2147 {
2148   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2149   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2150   PetscScalar       *va,*vv;
2151   Vec               vB,vA;
2152   const PetscScalar *vb;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2156   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2157 
2158   PetscCall(VecGetArrayWrite(vA,&va));
2159   if (idx) {
2160     for (i=0; i<m; i++) {
2161       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2162     }
2163   }
2164 
2165   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2166   PetscCall(PetscMalloc1(m,&idxb));
2167   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2168 
2169   PetscCall(VecGetArrayWrite(v,&vv));
2170   PetscCall(VecGetArrayRead(vB,&vb));
2171   for (i=0; i<m; i++) {
2172     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2173       vv[i] = vb[i];
2174       if (idx) idx[i] = a->garray[idxb[i]];
2175     } else {
2176       vv[i] = va[i];
2177       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2178         idx[i] = a->garray[idxb[i]];
2179     }
2180   }
2181   PetscCall(VecRestoreArrayWrite(vA,&vv));
2182   PetscCall(VecRestoreArrayWrite(vA,&va));
2183   PetscCall(VecRestoreArrayRead(vB,&vb));
2184   PetscCall(PetscFree(idxb));
2185   PetscCall(VecDestroy(&vA));
2186   PetscCall(VecDestroy(&vB));
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2191 {
2192   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2193   PetscInt          m = A->rmap->n,n = A->cmap->n;
2194   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2195   PetscInt          *cmap  = mat->garray;
2196   PetscInt          *diagIdx, *offdiagIdx;
2197   Vec               diagV, offdiagV;
2198   PetscScalar       *a, *diagA, *offdiagA;
2199   const PetscScalar *ba,*bav;
2200   PetscInt          r,j,col,ncols,*bi,*bj;
2201   Mat               B = mat->B;
2202   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2203 
2204   PetscFunctionBegin;
2205   /* When a process holds entire A and other processes have no entry */
2206   if (A->cmap->N == n) {
2207     PetscCall(VecGetArrayWrite(v,&diagA));
2208     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2209     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2210     PetscCall(VecDestroy(&diagV));
2211     PetscCall(VecRestoreArrayWrite(v,&diagA));
2212     PetscFunctionReturn(0);
2213   } else if (n == 0) {
2214     if (m) {
2215       PetscCall(VecGetArrayWrite(v,&a));
2216       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2217       PetscCall(VecRestoreArrayWrite(v,&a));
2218     }
2219     PetscFunctionReturn(0);
2220   }
2221 
2222   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2223   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2224   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2225   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2226 
2227   /* Get offdiagIdx[] for implicit 0.0 */
2228   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2229   ba   = bav;
2230   bi   = b->i;
2231   bj   = b->j;
2232   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2233   for (r = 0; r < m; r++) {
2234     ncols = bi[r+1] - bi[r];
2235     if (ncols == A->cmap->N - n) { /* Brow is dense */
2236       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2237     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2238       offdiagA[r] = 0.0;
2239 
2240       /* Find first hole in the cmap */
2241       for (j=0; j<ncols; j++) {
2242         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2243         if (col > j && j < cstart) {
2244           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2245           break;
2246         } else if (col > j + n && j >= cstart) {
2247           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2248           break;
2249         }
2250       }
2251       if (j == ncols && ncols < A->cmap->N - n) {
2252         /* a hole is outside compressed Bcols */
2253         if (ncols == 0) {
2254           if (cstart) {
2255             offdiagIdx[r] = 0;
2256           } else offdiagIdx[r] = cend;
2257         } else { /* ncols > 0 */
2258           offdiagIdx[r] = cmap[ncols-1] + 1;
2259           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2260         }
2261       }
2262     }
2263 
2264     for (j=0; j<ncols; j++) {
2265       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2266       ba++; bj++;
2267     }
2268   }
2269 
2270   PetscCall(VecGetArrayWrite(v, &a));
2271   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2272   for (r = 0; r < m; ++r) {
2273     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2274       a[r]   = diagA[r];
2275       if (idx) idx[r] = cstart + diagIdx[r];
2276     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2277       a[r] = diagA[r];
2278       if (idx) {
2279         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2280           idx[r] = cstart + diagIdx[r];
2281         } else idx[r] = offdiagIdx[r];
2282       }
2283     } else {
2284       a[r]   = offdiagA[r];
2285       if (idx) idx[r] = offdiagIdx[r];
2286     }
2287   }
2288   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2289   PetscCall(VecRestoreArrayWrite(v, &a));
2290   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2291   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2292   PetscCall(VecDestroy(&diagV));
2293   PetscCall(VecDestroy(&offdiagV));
2294   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2299 {
2300   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2301   PetscInt          m = A->rmap->n,n = A->cmap->n;
2302   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2303   PetscInt          *cmap  = mat->garray;
2304   PetscInt          *diagIdx, *offdiagIdx;
2305   Vec               diagV, offdiagV;
2306   PetscScalar       *a, *diagA, *offdiagA;
2307   const PetscScalar *ba,*bav;
2308   PetscInt          r,j,col,ncols,*bi,*bj;
2309   Mat               B = mat->B;
2310   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2311 
2312   PetscFunctionBegin;
2313   /* When a process holds entire A and other processes have no entry */
2314   if (A->cmap->N == n) {
2315     PetscCall(VecGetArrayWrite(v,&diagA));
2316     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2317     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2318     PetscCall(VecDestroy(&diagV));
2319     PetscCall(VecRestoreArrayWrite(v,&diagA));
2320     PetscFunctionReturn(0);
2321   } else if (n == 0) {
2322     if (m) {
2323       PetscCall(VecGetArrayWrite(v,&a));
2324       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2325       PetscCall(VecRestoreArrayWrite(v,&a));
2326     }
2327     PetscFunctionReturn(0);
2328   }
2329 
2330   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2331   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2332   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2333   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2334 
2335   /* Get offdiagIdx[] for implicit 0.0 */
2336   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2337   ba   = bav;
2338   bi   = b->i;
2339   bj   = b->j;
2340   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2341   for (r = 0; r < m; r++) {
2342     ncols = bi[r+1] - bi[r];
2343     if (ncols == A->cmap->N - n) { /* Brow is dense */
2344       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2345     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2346       offdiagA[r] = 0.0;
2347 
2348       /* Find first hole in the cmap */
2349       for (j=0; j<ncols; j++) {
2350         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2351         if (col > j && j < cstart) {
2352           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2353           break;
2354         } else if (col > j + n && j >= cstart) {
2355           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2356           break;
2357         }
2358       }
2359       if (j == ncols && ncols < A->cmap->N - n) {
2360         /* a hole is outside compressed Bcols */
2361         if (ncols == 0) {
2362           if (cstart) {
2363             offdiagIdx[r] = 0;
2364           } else offdiagIdx[r] = cend;
2365         } else { /* ncols > 0 */
2366           offdiagIdx[r] = cmap[ncols-1] + 1;
2367           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2368         }
2369       }
2370     }
2371 
2372     for (j=0; j<ncols; j++) {
2373       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2374       ba++; bj++;
2375     }
2376   }
2377 
2378   PetscCall(VecGetArrayWrite(v, &a));
2379   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2380   for (r = 0; r < m; ++r) {
2381     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2382       a[r]   = diagA[r];
2383       if (idx) idx[r] = cstart + diagIdx[r];
2384     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2385       a[r] = diagA[r];
2386       if (idx) {
2387         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2388           idx[r] = cstart + diagIdx[r];
2389         } else idx[r] = offdiagIdx[r];
2390       }
2391     } else {
2392       a[r]   = offdiagA[r];
2393       if (idx) idx[r] = offdiagIdx[r];
2394     }
2395   }
2396   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2397   PetscCall(VecRestoreArrayWrite(v, &a));
2398   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2399   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2400   PetscCall(VecDestroy(&diagV));
2401   PetscCall(VecDestroy(&offdiagV));
2402   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2407 {
2408   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2409   PetscInt          m = A->rmap->n,n = A->cmap->n;
2410   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2411   PetscInt          *cmap  = mat->garray;
2412   PetscInt          *diagIdx, *offdiagIdx;
2413   Vec               diagV, offdiagV;
2414   PetscScalar       *a, *diagA, *offdiagA;
2415   const PetscScalar *ba,*bav;
2416   PetscInt          r,j,col,ncols,*bi,*bj;
2417   Mat               B = mat->B;
2418   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2419 
2420   PetscFunctionBegin;
2421   /* When a process holds entire A and other processes have no entry */
2422   if (A->cmap->N == n) {
2423     PetscCall(VecGetArrayWrite(v,&diagA));
2424     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2425     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2426     PetscCall(VecDestroy(&diagV));
2427     PetscCall(VecRestoreArrayWrite(v,&diagA));
2428     PetscFunctionReturn(0);
2429   } else if (n == 0) {
2430     if (m) {
2431       PetscCall(VecGetArrayWrite(v,&a));
2432       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2433       PetscCall(VecRestoreArrayWrite(v,&a));
2434     }
2435     PetscFunctionReturn(0);
2436   }
2437 
2438   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2439   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2440   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2441   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2442 
2443   /* Get offdiagIdx[] for implicit 0.0 */
2444   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2445   ba   = bav;
2446   bi   = b->i;
2447   bj   = b->j;
2448   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2449   for (r = 0; r < m; r++) {
2450     ncols = bi[r+1] - bi[r];
2451     if (ncols == A->cmap->N - n) { /* Brow is dense */
2452       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2453     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2454       offdiagA[r] = 0.0;
2455 
2456       /* Find first hole in the cmap */
2457       for (j=0; j<ncols; j++) {
2458         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2459         if (col > j && j < cstart) {
2460           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2461           break;
2462         } else if (col > j + n && j >= cstart) {
2463           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2464           break;
2465         }
2466       }
2467       if (j == ncols && ncols < A->cmap->N - n) {
2468         /* a hole is outside compressed Bcols */
2469         if (ncols == 0) {
2470           if (cstart) {
2471             offdiagIdx[r] = 0;
2472           } else offdiagIdx[r] = cend;
2473         } else { /* ncols > 0 */
2474           offdiagIdx[r] = cmap[ncols-1] + 1;
2475           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2476         }
2477       }
2478     }
2479 
2480     for (j=0; j<ncols; j++) {
2481       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2482       ba++; bj++;
2483     }
2484   }
2485 
2486   PetscCall(VecGetArrayWrite(v,    &a));
2487   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2488   for (r = 0; r < m; ++r) {
2489     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2490       a[r] = diagA[r];
2491       if (idx) idx[r] = cstart + diagIdx[r];
2492     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2493       a[r] = diagA[r];
2494       if (idx) {
2495         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2496           idx[r] = cstart + diagIdx[r];
2497         } else idx[r] = offdiagIdx[r];
2498       }
2499     } else {
2500       a[r] = offdiagA[r];
2501       if (idx) idx[r] = offdiagIdx[r];
2502     }
2503   }
2504   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2505   PetscCall(VecRestoreArrayWrite(v,       &a));
2506   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2507   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2508   PetscCall(VecDestroy(&diagV));
2509   PetscCall(VecDestroy(&offdiagV));
2510   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2515 {
2516   Mat            *dummy;
2517 
2518   PetscFunctionBegin;
2519   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2520   *newmat = *dummy;
2521   PetscCall(PetscFree(dummy));
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2526 {
2527   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2528 
2529   PetscFunctionBegin;
2530   PetscCall(MatInvertBlockDiagonal(a->A,values));
2531   A->factorerrortype = a->A->factorerrortype;
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2536 {
2537   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2538 
2539   PetscFunctionBegin;
2540   PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2541   PetscCall(MatSetRandom(aij->A,rctx));
2542   if (x->assembled) {
2543     PetscCall(MatSetRandom(aij->B,rctx));
2544   } else {
2545     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2546   }
2547   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2548   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2553 {
2554   PetscFunctionBegin;
2555   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2556   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 /*@
2561    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2562 
2563    Collective on Mat
2564 
2565    Input Parameters:
2566 +    A - the matrix
2567 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2568 
2569  Level: advanced
2570 
2571 @*/
2572 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2573 {
2574   PetscFunctionBegin;
2575   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2580 {
2581   PetscBool            sc = PETSC_FALSE,flg;
2582 
2583   PetscFunctionBegin;
2584   PetscCall(PetscOptionsHead(PetscOptionsObject,"MPIAIJ options"));
2585   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2586   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2587   if (flg) {
2588     PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2589   }
2590   PetscCall(PetscOptionsTail());
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2595 {
2596   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2597   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2598 
2599   PetscFunctionBegin;
2600   if (!Y->preallocated) {
2601     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2602   } else if (!aij->nz) {
2603     PetscInt nonew = aij->nonew;
2604     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2605     aij->nonew = nonew;
2606   }
2607   PetscCall(MatShift_Basic(Y,a));
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2612 {
2613   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2614 
2615   PetscFunctionBegin;
2616   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2617   PetscCall(MatMissingDiagonal(a->A,missing,d));
2618   if (d) {
2619     PetscInt rstart;
2620     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2621     *d += rstart;
2622 
2623   }
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2628 {
2629   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2630 
2631   PetscFunctionBegin;
2632   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 /* -------------------------------------------------------------------*/
2637 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2638                                        MatGetRow_MPIAIJ,
2639                                        MatRestoreRow_MPIAIJ,
2640                                        MatMult_MPIAIJ,
2641                                 /* 4*/ MatMultAdd_MPIAIJ,
2642                                        MatMultTranspose_MPIAIJ,
2643                                        MatMultTransposeAdd_MPIAIJ,
2644                                        NULL,
2645                                        NULL,
2646                                        NULL,
2647                                 /*10*/ NULL,
2648                                        NULL,
2649                                        NULL,
2650                                        MatSOR_MPIAIJ,
2651                                        MatTranspose_MPIAIJ,
2652                                 /*15*/ MatGetInfo_MPIAIJ,
2653                                        MatEqual_MPIAIJ,
2654                                        MatGetDiagonal_MPIAIJ,
2655                                        MatDiagonalScale_MPIAIJ,
2656                                        MatNorm_MPIAIJ,
2657                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2658                                        MatAssemblyEnd_MPIAIJ,
2659                                        MatSetOption_MPIAIJ,
2660                                        MatZeroEntries_MPIAIJ,
2661                                 /*24*/ MatZeroRows_MPIAIJ,
2662                                        NULL,
2663                                        NULL,
2664                                        NULL,
2665                                        NULL,
2666                                 /*29*/ MatSetUp_MPIAIJ,
2667                                        NULL,
2668                                        NULL,
2669                                        MatGetDiagonalBlock_MPIAIJ,
2670                                        NULL,
2671                                 /*34*/ MatDuplicate_MPIAIJ,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                        NULL,
2676                                 /*39*/ MatAXPY_MPIAIJ,
2677                                        MatCreateSubMatrices_MPIAIJ,
2678                                        MatIncreaseOverlap_MPIAIJ,
2679                                        MatGetValues_MPIAIJ,
2680                                        MatCopy_MPIAIJ,
2681                                 /*44*/ MatGetRowMax_MPIAIJ,
2682                                        MatScale_MPIAIJ,
2683                                        MatShift_MPIAIJ,
2684                                        MatDiagonalSet_MPIAIJ,
2685                                        MatZeroRowsColumns_MPIAIJ,
2686                                 /*49*/ MatSetRandom_MPIAIJ,
2687                                        NULL,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2692                                        NULL,
2693                                        MatSetUnfactored_MPIAIJ,
2694                                        MatPermute_MPIAIJ,
2695                                        NULL,
2696                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2697                                        MatDestroy_MPIAIJ,
2698                                        MatView_MPIAIJ,
2699                                        NULL,
2700                                        NULL,
2701                                 /*64*/ NULL,
2702                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2703                                        NULL,
2704                                        NULL,
2705                                        NULL,
2706                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2707                                        MatGetRowMinAbs_MPIAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                 /*75*/ MatFDColoringApply_AIJ,
2713                                        MatSetFromOptions_MPIAIJ,
2714                                        NULL,
2715                                        NULL,
2716                                        MatFindZeroDiagonals_MPIAIJ,
2717                                 /*80*/ NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*83*/ MatLoad_MPIAIJ,
2721                                        MatIsSymmetric_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                 /*89*/ NULL,
2727                                        NULL,
2728                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2729                                        NULL,
2730                                        NULL,
2731                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                        MatBindToCPU_MPIAIJ,
2736                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        MatConjugate_MPIAIJ,
2740                                        NULL,
2741                                 /*104*/MatSetValuesRow_MPIAIJ,
2742                                        MatRealPart_MPIAIJ,
2743                                        MatImaginaryPart_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                 /*109*/NULL,
2747                                        NULL,
2748                                        MatGetRowMin_MPIAIJ,
2749                                        NULL,
2750                                        MatMissingDiagonal_MPIAIJ,
2751                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2752                                        NULL,
2753                                        MatGetGhosts_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        NULL,
2759                                        NULL,
2760                                        MatGetMultiProcBlock_MPIAIJ,
2761                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2762                                        MatGetColumnReductions_MPIAIJ,
2763                                        MatInvertBlockDiagonal_MPIAIJ,
2764                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2765                                        MatCreateSubMatricesMPI_MPIAIJ,
2766                                 /*129*/NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2770                                        NULL,
2771                                 /*134*/NULL,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                 /*139*/MatSetBlockSizes_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        MatFDColoringSetUp_MPIXAIJ,
2780                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2781                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2782                                 /*145*/NULL,
2783                                        NULL,
2784                                        NULL
2785 };
2786 
2787 /* ----------------------------------------------------------------------------------------*/
2788 
2789 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2790 {
2791   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2792 
2793   PetscFunctionBegin;
2794   PetscCall(MatStoreValues(aij->A));
2795   PetscCall(MatStoreValues(aij->B));
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2800 {
2801   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2802 
2803   PetscFunctionBegin;
2804   PetscCall(MatRetrieveValues(aij->A));
2805   PetscCall(MatRetrieveValues(aij->B));
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2810 {
2811   Mat_MPIAIJ     *b;
2812   PetscMPIInt    size;
2813 
2814   PetscFunctionBegin;
2815   PetscCall(PetscLayoutSetUp(B->rmap));
2816   PetscCall(PetscLayoutSetUp(B->cmap));
2817   b = (Mat_MPIAIJ*)B->data;
2818 
2819 #if defined(PETSC_USE_CTABLE)
2820   PetscCall(PetscTableDestroy(&b->colmap));
2821 #else
2822   PetscCall(PetscFree(b->colmap));
2823 #endif
2824   PetscCall(PetscFree(b->garray));
2825   PetscCall(VecDestroy(&b->lvec));
2826   PetscCall(VecScatterDestroy(&b->Mvctx));
2827 
2828   /* Because the B will have been resized we simply destroy it and create a new one each time */
2829   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2830   PetscCall(MatDestroy(&b->B));
2831   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2832   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2833   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2834   PetscCall(MatSetType(b->B,MATSEQAIJ));
2835   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2836 
2837   if (!B->preallocated) {
2838     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2839     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2840     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2841     PetscCall(MatSetType(b->A,MATSEQAIJ));
2842     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2843   }
2844 
2845   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2846   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2847   B->preallocated  = PETSC_TRUE;
2848   B->was_assembled = PETSC_FALSE;
2849   B->assembled     = PETSC_FALSE;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2854 {
2855   Mat_MPIAIJ     *b;
2856 
2857   PetscFunctionBegin;
2858   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2859   PetscCall(PetscLayoutSetUp(B->rmap));
2860   PetscCall(PetscLayoutSetUp(B->cmap));
2861   b = (Mat_MPIAIJ*)B->data;
2862 
2863 #if defined(PETSC_USE_CTABLE)
2864   PetscCall(PetscTableDestroy(&b->colmap));
2865 #else
2866   PetscCall(PetscFree(b->colmap));
2867 #endif
2868   PetscCall(PetscFree(b->garray));
2869   PetscCall(VecDestroy(&b->lvec));
2870   PetscCall(VecScatterDestroy(&b->Mvctx));
2871 
2872   PetscCall(MatResetPreallocation(b->A));
2873   PetscCall(MatResetPreallocation(b->B));
2874   B->preallocated  = PETSC_TRUE;
2875   B->was_assembled = PETSC_FALSE;
2876   B->assembled = PETSC_FALSE;
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2881 {
2882   Mat            mat;
2883   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2884 
2885   PetscFunctionBegin;
2886   *newmat = NULL;
2887   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2888   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2889   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2890   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2891   a       = (Mat_MPIAIJ*)mat->data;
2892 
2893   mat->factortype   = matin->factortype;
2894   mat->assembled    = matin->assembled;
2895   mat->insertmode   = NOT_SET_VALUES;
2896   mat->preallocated = matin->preallocated;
2897 
2898   a->size         = oldmat->size;
2899   a->rank         = oldmat->rank;
2900   a->donotstash   = oldmat->donotstash;
2901   a->roworiented  = oldmat->roworiented;
2902   a->rowindices   = NULL;
2903   a->rowvalues    = NULL;
2904   a->getrowactive = PETSC_FALSE;
2905 
2906   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2907   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2908 
2909   if (oldmat->colmap) {
2910 #if defined(PETSC_USE_CTABLE)
2911     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2912 #else
2913     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2914     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2915     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2916 #endif
2917   } else a->colmap = NULL;
2918   if (oldmat->garray) {
2919     PetscInt len;
2920     len  = oldmat->B->cmap->n;
2921     PetscCall(PetscMalloc1(len+1,&a->garray));
2922     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2923     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2924   } else a->garray = NULL;
2925 
2926   /* It may happen MatDuplicate is called with a non-assembled matrix
2927      In fact, MatDuplicate only requires the matrix to be preallocated
2928      This may happen inside a DMCreateMatrix_Shell */
2929   if (oldmat->lvec) {
2930     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2931     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2932   }
2933   if (oldmat->Mvctx) {
2934     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2935     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2936   }
2937   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2938   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2939   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2940   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2941   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2942   *newmat = mat;
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2947 {
2948   PetscBool      isbinary, ishdf5;
2949 
2950   PetscFunctionBegin;
2951   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2952   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2953   /* force binary viewer to load .info file if it has not yet done so */
2954   PetscCall(PetscViewerSetUp(viewer));
2955   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2956   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2957   if (isbinary) {
2958     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2959   } else if (ishdf5) {
2960 #if defined(PETSC_HAVE_HDF5)
2961     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2962 #else
2963     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2964 #endif
2965   } else {
2966     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2967   }
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2972 {
2973   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2974   PetscInt       *rowidxs,*colidxs;
2975   PetscScalar    *matvals;
2976 
2977   PetscFunctionBegin;
2978   PetscCall(PetscViewerSetUp(viewer));
2979 
2980   /* read in matrix header */
2981   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
2982   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2983   M  = header[1]; N = header[2]; nz = header[3];
2984   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
2985   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
2986   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2987 
2988   /* set block sizes from the viewer's .info file */
2989   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
2990   /* set global sizes if not set already */
2991   if (mat->rmap->N < 0) mat->rmap->N = M;
2992   if (mat->cmap->N < 0) mat->cmap->N = N;
2993   PetscCall(PetscLayoutSetUp(mat->rmap));
2994   PetscCall(PetscLayoutSetUp(mat->cmap));
2995 
2996   /* check if the matrix sizes are correct */
2997   PetscCall(MatGetSize(mat,&rows,&cols));
2998   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
2999 
3000   /* read in row lengths and build row indices */
3001   PetscCall(MatGetLocalSize(mat,&m,NULL));
3002   PetscCall(PetscMalloc1(m+1,&rowidxs));
3003   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3004   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3005   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3006   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3007   /* read in column indices and matrix values */
3008   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3009   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3010   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3011   /* store matrix indices and values */
3012   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3013   PetscCall(PetscFree(rowidxs));
3014   PetscCall(PetscFree2(colidxs,matvals));
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 /* Not scalable because of ISAllGather() unless getting all columns. */
3019 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3020 {
3021   IS             iscol_local;
3022   PetscBool      isstride;
3023   PetscMPIInt    lisstride=0,gisstride;
3024 
3025   PetscFunctionBegin;
3026   /* check if we are grabbing all columns*/
3027   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3028 
3029   if (isstride) {
3030     PetscInt  start,len,mstart,mlen;
3031     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3032     PetscCall(ISGetLocalSize(iscol,&len));
3033     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3034     if (mstart == start && mlen-mstart == len) lisstride = 1;
3035   }
3036 
3037   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3038   if (gisstride) {
3039     PetscInt N;
3040     PetscCall(MatGetSize(mat,NULL,&N));
3041     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3042     PetscCall(ISSetIdentity(iscol_local));
3043     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3044   } else {
3045     PetscInt cbs;
3046     PetscCall(ISGetBlockSize(iscol,&cbs));
3047     PetscCall(ISAllGather(iscol,&iscol_local));
3048     PetscCall(ISSetBlockSize(iscol_local,cbs));
3049   }
3050 
3051   *isseq = iscol_local;
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 /*
3056  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3057  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3058 
3059  Input Parameters:
3060    mat - matrix
3061    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3062            i.e., mat->rstart <= isrow[i] < mat->rend
3063    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3064            i.e., mat->cstart <= iscol[i] < mat->cend
3065  Output Parameter:
3066    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3067    iscol_o - sequential column index set for retrieving mat->B
3068    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3069  */
3070 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3071 {
3072   Vec            x,cmap;
3073   const PetscInt *is_idx;
3074   PetscScalar    *xarray,*cmaparray;
3075   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3076   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3077   Mat            B=a->B;
3078   Vec            lvec=a->lvec,lcmap;
3079   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3080   MPI_Comm       comm;
3081   VecScatter     Mvctx=a->Mvctx;
3082 
3083   PetscFunctionBegin;
3084   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3085   PetscCall(ISGetLocalSize(iscol,&ncols));
3086 
3087   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3088   PetscCall(MatCreateVecs(mat,&x,NULL));
3089   PetscCall(VecSet(x,-1.0));
3090   PetscCall(VecDuplicate(x,&cmap));
3091   PetscCall(VecSet(cmap,-1.0));
3092 
3093   /* Get start indices */
3094   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3095   isstart -= ncols;
3096   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3097 
3098   PetscCall(ISGetIndices(iscol,&is_idx));
3099   PetscCall(VecGetArray(x,&xarray));
3100   PetscCall(VecGetArray(cmap,&cmaparray));
3101   PetscCall(PetscMalloc1(ncols,&idx));
3102   for (i=0; i<ncols; i++) {
3103     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3104     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3105     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3106   }
3107   PetscCall(VecRestoreArray(x,&xarray));
3108   PetscCall(VecRestoreArray(cmap,&cmaparray));
3109   PetscCall(ISRestoreIndices(iscol,&is_idx));
3110 
3111   /* Get iscol_d */
3112   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3113   PetscCall(ISGetBlockSize(iscol,&i));
3114   PetscCall(ISSetBlockSize(*iscol_d,i));
3115 
3116   /* Get isrow_d */
3117   PetscCall(ISGetLocalSize(isrow,&m));
3118   rstart = mat->rmap->rstart;
3119   PetscCall(PetscMalloc1(m,&idx));
3120   PetscCall(ISGetIndices(isrow,&is_idx));
3121   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3122   PetscCall(ISRestoreIndices(isrow,&is_idx));
3123 
3124   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3125   PetscCall(ISGetBlockSize(isrow,&i));
3126   PetscCall(ISSetBlockSize(*isrow_d,i));
3127 
3128   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3129   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3130   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3131 
3132   PetscCall(VecDuplicate(lvec,&lcmap));
3133 
3134   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3135   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3136 
3137   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3138   /* off-process column indices */
3139   count = 0;
3140   PetscCall(PetscMalloc1(Bn,&idx));
3141   PetscCall(PetscMalloc1(Bn,&cmap1));
3142 
3143   PetscCall(VecGetArray(lvec,&xarray));
3144   PetscCall(VecGetArray(lcmap,&cmaparray));
3145   for (i=0; i<Bn; i++) {
3146     if (PetscRealPart(xarray[i]) > -1.0) {
3147       idx[count]     = i;                   /* local column index in off-diagonal part B */
3148       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3149       count++;
3150     }
3151   }
3152   PetscCall(VecRestoreArray(lvec,&xarray));
3153   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3154 
3155   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3156   /* cannot ensure iscol_o has same blocksize as iscol! */
3157 
3158   PetscCall(PetscFree(idx));
3159   *garray = cmap1;
3160 
3161   PetscCall(VecDestroy(&x));
3162   PetscCall(VecDestroy(&cmap));
3163   PetscCall(VecDestroy(&lcmap));
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3168 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3169 {
3170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3171   Mat            M = NULL;
3172   MPI_Comm       comm;
3173   IS             iscol_d,isrow_d,iscol_o;
3174   Mat            Asub = NULL,Bsub = NULL;
3175   PetscInt       n;
3176 
3177   PetscFunctionBegin;
3178   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3179 
3180   if (call == MAT_REUSE_MATRIX) {
3181     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3182     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3183     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3184 
3185     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3186     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3187 
3188     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3189     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3190 
3191     /* Update diagonal and off-diagonal portions of submat */
3192     asub = (Mat_MPIAIJ*)(*submat)->data;
3193     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3194     PetscCall(ISGetLocalSize(iscol_o,&n));
3195     if (n) {
3196       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3197     }
3198     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3199     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3200 
3201   } else { /* call == MAT_INITIAL_MATRIX) */
3202     const PetscInt *garray;
3203     PetscInt        BsubN;
3204 
3205     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3206     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3207 
3208     /* Create local submatrices Asub and Bsub */
3209     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3210     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3211 
3212     /* Create submatrix M */
3213     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3214 
3215     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3216     asub = (Mat_MPIAIJ*)M->data;
3217 
3218     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3219     n = asub->B->cmap->N;
3220     if (BsubN > n) {
3221       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3222       const PetscInt *idx;
3223       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3224       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3225 
3226       PetscCall(PetscMalloc1(n,&idx_new));
3227       j = 0;
3228       PetscCall(ISGetIndices(iscol_o,&idx));
3229       for (i=0; i<n; i++) {
3230         if (j >= BsubN) break;
3231         while (subgarray[i] > garray[j]) j++;
3232 
3233         if (subgarray[i] == garray[j]) {
3234           idx_new[i] = idx[j++];
3235         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3236       }
3237       PetscCall(ISRestoreIndices(iscol_o,&idx));
3238 
3239       PetscCall(ISDestroy(&iscol_o));
3240       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3241 
3242     } else if (BsubN < n) {
3243       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3244     }
3245 
3246     PetscCall(PetscFree(garray));
3247     *submat = M;
3248 
3249     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3250     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3251     PetscCall(ISDestroy(&isrow_d));
3252 
3253     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3254     PetscCall(ISDestroy(&iscol_d));
3255 
3256     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3257     PetscCall(ISDestroy(&iscol_o));
3258   }
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3263 {
3264   IS             iscol_local=NULL,isrow_d;
3265   PetscInt       csize;
3266   PetscInt       n,i,j,start,end;
3267   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3268   MPI_Comm       comm;
3269 
3270   PetscFunctionBegin;
3271   /* If isrow has same processor distribution as mat,
3272      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3273   if (call == MAT_REUSE_MATRIX) {
3274     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3275     if (isrow_d) {
3276       sameRowDist  = PETSC_TRUE;
3277       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3278     } else {
3279       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3280       if (iscol_local) {
3281         sameRowDist  = PETSC_TRUE;
3282         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3283       }
3284     }
3285   } else {
3286     /* Check if isrow has same processor distribution as mat */
3287     sameDist[0] = PETSC_FALSE;
3288     PetscCall(ISGetLocalSize(isrow,&n));
3289     if (!n) {
3290       sameDist[0] = PETSC_TRUE;
3291     } else {
3292       PetscCall(ISGetMinMax(isrow,&i,&j));
3293       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3294       if (i >= start && j < end) {
3295         sameDist[0] = PETSC_TRUE;
3296       }
3297     }
3298 
3299     /* Check if iscol has same processor distribution as mat */
3300     sameDist[1] = PETSC_FALSE;
3301     PetscCall(ISGetLocalSize(iscol,&n));
3302     if (!n) {
3303       sameDist[1] = PETSC_TRUE;
3304     } else {
3305       PetscCall(ISGetMinMax(iscol,&i,&j));
3306       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3307       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3308     }
3309 
3310     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3311     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3312     sameRowDist = tsameDist[0];
3313   }
3314 
3315   if (sameRowDist) {
3316     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3317       /* isrow and iscol have same processor distribution as mat */
3318       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3319       PetscFunctionReturn(0);
3320     } else { /* sameRowDist */
3321       /* isrow has same processor distribution as mat */
3322       if (call == MAT_INITIAL_MATRIX) {
3323         PetscBool sorted;
3324         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3325         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3326         PetscCall(ISGetSize(iscol,&i));
3327         PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3328 
3329         PetscCall(ISSorted(iscol_local,&sorted));
3330         if (sorted) {
3331           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3332           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3333           PetscFunctionReturn(0);
3334         }
3335       } else { /* call == MAT_REUSE_MATRIX */
3336         IS iscol_sub;
3337         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3338         if (iscol_sub) {
3339           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3340           PetscFunctionReturn(0);
3341         }
3342       }
3343     }
3344   }
3345 
3346   /* General case: iscol -> iscol_local which has global size of iscol */
3347   if (call == MAT_REUSE_MATRIX) {
3348     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3349     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3350   } else {
3351     if (!iscol_local) {
3352       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3353     }
3354   }
3355 
3356   PetscCall(ISGetLocalSize(iscol,&csize));
3357   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3358 
3359   if (call == MAT_INITIAL_MATRIX) {
3360     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3361     PetscCall(ISDestroy(&iscol_local));
3362   }
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 /*@C
3367      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3368          and "off-diagonal" part of the matrix in CSR format.
3369 
3370    Collective
3371 
3372    Input Parameters:
3373 +  comm - MPI communicator
3374 .  A - "diagonal" portion of matrix
3375 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3376 -  garray - global index of B columns
3377 
3378    Output Parameter:
3379 .   mat - the matrix, with input A as its local diagonal matrix
3380    Level: advanced
3381 
3382    Notes:
3383        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3384        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3385 
3386 .seealso: MatCreateMPIAIJWithSplitArrays()
3387 @*/
3388 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3389 {
3390   Mat_MPIAIJ        *maij;
3391   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3392   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3393   const PetscScalar *oa;
3394   Mat               Bnew;
3395   PetscInt          m,n,N;
3396   MatType           mpi_mat_type;
3397 
3398   PetscFunctionBegin;
3399   PetscCall(MatCreate(comm,mat));
3400   PetscCall(MatGetSize(A,&m,&n));
3401   PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3402   PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3403   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3404   /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3405 
3406   /* Get global columns of mat */
3407   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3408 
3409   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3410   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3411   PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type));
3412   PetscCall(MatSetType(*mat,mpi_mat_type));
3413 
3414   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3415   maij = (Mat_MPIAIJ*)(*mat)->data;
3416 
3417   (*mat)->preallocated = PETSC_TRUE;
3418 
3419   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3420   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3421 
3422   /* Set A as diagonal portion of *mat */
3423   maij->A = A;
3424 
3425   nz = oi[m];
3426   for (i=0; i<nz; i++) {
3427     col   = oj[i];
3428     oj[i] = garray[col];
3429   }
3430 
3431   /* Set Bnew as off-diagonal portion of *mat */
3432   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3433   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3434   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3435   bnew        = (Mat_SeqAIJ*)Bnew->data;
3436   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3437   maij->B     = Bnew;
3438 
3439   PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3440 
3441   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3442   b->free_a       = PETSC_FALSE;
3443   b->free_ij      = PETSC_FALSE;
3444   PetscCall(MatDestroy(&B));
3445 
3446   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3447   bnew->free_a       = PETSC_TRUE;
3448   bnew->free_ij      = PETSC_TRUE;
3449 
3450   /* condense columns of maij->B */
3451   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3452   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3453   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3454   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3455   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3456   PetscFunctionReturn(0);
3457 }
3458 
3459 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3460 
3461 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3462 {
3463   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3464   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3465   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3466   Mat            M,Msub,B=a->B;
3467   MatScalar      *aa;
3468   Mat_SeqAIJ     *aij;
3469   PetscInt       *garray = a->garray,*colsub,Ncols;
3470   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3471   IS             iscol_sub,iscmap;
3472   const PetscInt *is_idx,*cmap;
3473   PetscBool      allcolumns=PETSC_FALSE;
3474   MPI_Comm       comm;
3475 
3476   PetscFunctionBegin;
3477   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3478   if (call == MAT_REUSE_MATRIX) {
3479     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3480     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3481     PetscCall(ISGetLocalSize(iscol_sub,&count));
3482 
3483     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3484     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3485 
3486     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3487     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3488 
3489     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3490 
3491   } else { /* call == MAT_INITIAL_MATRIX) */
3492     PetscBool flg;
3493 
3494     PetscCall(ISGetLocalSize(iscol,&n));
3495     PetscCall(ISGetSize(iscol,&Ncols));
3496 
3497     /* (1) iscol -> nonscalable iscol_local */
3498     /* Check for special case: each processor gets entire matrix columns */
3499     PetscCall(ISIdentity(iscol_local,&flg));
3500     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3501     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3502     if (allcolumns) {
3503       iscol_sub = iscol_local;
3504       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3505       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3506 
3507     } else {
3508       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3509       PetscInt *idx,*cmap1,k;
3510       PetscCall(PetscMalloc1(Ncols,&idx));
3511       PetscCall(PetscMalloc1(Ncols,&cmap1));
3512       PetscCall(ISGetIndices(iscol_local,&is_idx));
3513       count = 0;
3514       k     = 0;
3515       for (i=0; i<Ncols; i++) {
3516         j = is_idx[i];
3517         if (j >= cstart && j < cend) {
3518           /* diagonal part of mat */
3519           idx[count]     = j;
3520           cmap1[count++] = i; /* column index in submat */
3521         } else if (Bn) {
3522           /* off-diagonal part of mat */
3523           if (j == garray[k]) {
3524             idx[count]     = j;
3525             cmap1[count++] = i;  /* column index in submat */
3526           } else if (j > garray[k]) {
3527             while (j > garray[k] && k < Bn-1) k++;
3528             if (j == garray[k]) {
3529               idx[count]     = j;
3530               cmap1[count++] = i; /* column index in submat */
3531             }
3532           }
3533         }
3534       }
3535       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3536 
3537       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3538       PetscCall(ISGetBlockSize(iscol,&cbs));
3539       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3540 
3541       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3542     }
3543 
3544     /* (3) Create sequential Msub */
3545     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3546   }
3547 
3548   PetscCall(ISGetLocalSize(iscol_sub,&count));
3549   aij  = (Mat_SeqAIJ*)(Msub)->data;
3550   ii   = aij->i;
3551   PetscCall(ISGetIndices(iscmap,&cmap));
3552 
3553   /*
3554       m - number of local rows
3555       Ncols - number of columns (same on all processors)
3556       rstart - first row in new global matrix generated
3557   */
3558   PetscCall(MatGetSize(Msub,&m,NULL));
3559 
3560   if (call == MAT_INITIAL_MATRIX) {
3561     /* (4) Create parallel newmat */
3562     PetscMPIInt    rank,size;
3563     PetscInt       csize;
3564 
3565     PetscCallMPI(MPI_Comm_size(comm,&size));
3566     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3567 
3568     /*
3569         Determine the number of non-zeros in the diagonal and off-diagonal
3570         portions of the matrix in order to do correct preallocation
3571     */
3572 
3573     /* first get start and end of "diagonal" columns */
3574     PetscCall(ISGetLocalSize(iscol,&csize));
3575     if (csize == PETSC_DECIDE) {
3576       PetscCall(ISGetSize(isrow,&mglobal));
3577       if (mglobal == Ncols) { /* square matrix */
3578         nlocal = m;
3579       } else {
3580         nlocal = Ncols/size + ((Ncols % size) > rank);
3581       }
3582     } else {
3583       nlocal = csize;
3584     }
3585     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3586     rstart = rend - nlocal;
3587     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3588 
3589     /* next, compute all the lengths */
3590     jj    = aij->j;
3591     PetscCall(PetscMalloc1(2*m+1,&dlens));
3592     olens = dlens + m;
3593     for (i=0; i<m; i++) {
3594       jend = ii[i+1] - ii[i];
3595       olen = 0;
3596       dlen = 0;
3597       for (j=0; j<jend; j++) {
3598         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3599         else dlen++;
3600         jj++;
3601       }
3602       olens[i] = olen;
3603       dlens[i] = dlen;
3604     }
3605 
3606     PetscCall(ISGetBlockSize(isrow,&bs));
3607     PetscCall(ISGetBlockSize(iscol,&cbs));
3608 
3609     PetscCall(MatCreate(comm,&M));
3610     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3611     PetscCall(MatSetBlockSizes(M,bs,cbs));
3612     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3613     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3614     PetscCall(PetscFree(dlens));
3615 
3616   } else { /* call == MAT_REUSE_MATRIX */
3617     M    = *newmat;
3618     PetscCall(MatGetLocalSize(M,&i,NULL));
3619     PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3620     PetscCall(MatZeroEntries(M));
3621     /*
3622          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3623        rather than the slower MatSetValues().
3624     */
3625     M->was_assembled = PETSC_TRUE;
3626     M->assembled     = PETSC_FALSE;
3627   }
3628 
3629   /* (5) Set values of Msub to *newmat */
3630   PetscCall(PetscMalloc1(count,&colsub));
3631   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3632 
3633   jj   = aij->j;
3634   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3635   for (i=0; i<m; i++) {
3636     row = rstart + i;
3637     nz  = ii[i+1] - ii[i];
3638     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3639     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3640     jj += nz; aa += nz;
3641   }
3642   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3643   PetscCall(ISRestoreIndices(iscmap,&cmap));
3644 
3645   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3646   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3647 
3648   PetscCall(PetscFree(colsub));
3649 
3650   /* save Msub, iscol_sub and iscmap used in processor for next request */
3651   if (call == MAT_INITIAL_MATRIX) {
3652     *newmat = M;
3653     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3654     PetscCall(MatDestroy(&Msub));
3655 
3656     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3657     PetscCall(ISDestroy(&iscol_sub));
3658 
3659     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3660     PetscCall(ISDestroy(&iscmap));
3661 
3662     if (iscol_local) {
3663       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3664       PetscCall(ISDestroy(&iscol_local));
3665     }
3666   }
3667   PetscFunctionReturn(0);
3668 }
3669 
3670 /*
3671     Not great since it makes two copies of the submatrix, first an SeqAIJ
3672   in local and then by concatenating the local matrices the end result.
3673   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3674 
3675   Note: This requires a sequential iscol with all indices.
3676 */
3677 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3678 {
3679   PetscMPIInt    rank,size;
3680   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3681   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3682   Mat            M,Mreuse;
3683   MatScalar      *aa,*vwork;
3684   MPI_Comm       comm;
3685   Mat_SeqAIJ     *aij;
3686   PetscBool      colflag,allcolumns=PETSC_FALSE;
3687 
3688   PetscFunctionBegin;
3689   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3690   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3691   PetscCallMPI(MPI_Comm_size(comm,&size));
3692 
3693   /* Check for special case: each processor gets entire matrix columns */
3694   PetscCall(ISIdentity(iscol,&colflag));
3695   PetscCall(ISGetLocalSize(iscol,&n));
3696   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3697   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3698 
3699   if (call ==  MAT_REUSE_MATRIX) {
3700     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3701     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3702     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3703   } else {
3704     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3705   }
3706 
3707   /*
3708       m - number of local rows
3709       n - number of columns (same on all processors)
3710       rstart - first row in new global matrix generated
3711   */
3712   PetscCall(MatGetSize(Mreuse,&m,&n));
3713   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3714   if (call == MAT_INITIAL_MATRIX) {
3715     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3716     ii  = aij->i;
3717     jj  = aij->j;
3718 
3719     /*
3720         Determine the number of non-zeros in the diagonal and off-diagonal
3721         portions of the matrix in order to do correct preallocation
3722     */
3723 
3724     /* first get start and end of "diagonal" columns */
3725     if (csize == PETSC_DECIDE) {
3726       PetscCall(ISGetSize(isrow,&mglobal));
3727       if (mglobal == n) { /* square matrix */
3728         nlocal = m;
3729       } else {
3730         nlocal = n/size + ((n % size) > rank);
3731       }
3732     } else {
3733       nlocal = csize;
3734     }
3735     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3736     rstart = rend - nlocal;
3737     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3738 
3739     /* next, compute all the lengths */
3740     PetscCall(PetscMalloc1(2*m+1,&dlens));
3741     olens = dlens + m;
3742     for (i=0; i<m; i++) {
3743       jend = ii[i+1] - ii[i];
3744       olen = 0;
3745       dlen = 0;
3746       for (j=0; j<jend; j++) {
3747         if (*jj < rstart || *jj >= rend) olen++;
3748         else dlen++;
3749         jj++;
3750       }
3751       olens[i] = olen;
3752       dlens[i] = dlen;
3753     }
3754     PetscCall(MatCreate(comm,&M));
3755     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3756     PetscCall(MatSetBlockSizes(M,bs,cbs));
3757     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3758     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3759     PetscCall(PetscFree(dlens));
3760   } else {
3761     PetscInt ml,nl;
3762 
3763     M    = *newmat;
3764     PetscCall(MatGetLocalSize(M,&ml,&nl));
3765     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3766     PetscCall(MatZeroEntries(M));
3767     /*
3768          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3769        rather than the slower MatSetValues().
3770     */
3771     M->was_assembled = PETSC_TRUE;
3772     M->assembled     = PETSC_FALSE;
3773   }
3774   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3775   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3776   ii   = aij->i;
3777   jj   = aij->j;
3778 
3779   /* trigger copy to CPU if needed */
3780   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3781   for (i=0; i<m; i++) {
3782     row   = rstart + i;
3783     nz    = ii[i+1] - ii[i];
3784     cwork = jj; jj += nz;
3785     vwork = aa; aa += nz;
3786     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3787   }
3788   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3789 
3790   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3791   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3792   *newmat = M;
3793 
3794   /* save submatrix used in processor for next request */
3795   if (call ==  MAT_INITIAL_MATRIX) {
3796     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3797     PetscCall(MatDestroy(&Mreuse));
3798   }
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3803 {
3804   PetscInt       m,cstart, cend,j,nnz,i,d;
3805   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3806   const PetscInt *JJ;
3807   PetscBool      nooffprocentries;
3808 
3809   PetscFunctionBegin;
3810   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3811 
3812   PetscCall(PetscLayoutSetUp(B->rmap));
3813   PetscCall(PetscLayoutSetUp(B->cmap));
3814   m      = B->rmap->n;
3815   cstart = B->cmap->rstart;
3816   cend   = B->cmap->rend;
3817   rstart = B->rmap->rstart;
3818 
3819   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3820 
3821   if (PetscDefined(USE_DEBUG)) {
3822     for (i=0; i<m; i++) {
3823       nnz = Ii[i+1]- Ii[i];
3824       JJ  = J + Ii[i];
3825       PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3826       PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3827       PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3828     }
3829   }
3830 
3831   for (i=0; i<m; i++) {
3832     nnz     = Ii[i+1]- Ii[i];
3833     JJ      = J + Ii[i];
3834     nnz_max = PetscMax(nnz_max,nnz);
3835     d       = 0;
3836     for (j=0; j<nnz; j++) {
3837       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3838     }
3839     d_nnz[i] = d;
3840     o_nnz[i] = nnz - d;
3841   }
3842   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3843   PetscCall(PetscFree2(d_nnz,o_nnz));
3844 
3845   for (i=0; i<m; i++) {
3846     ii   = i + rstart;
3847     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3848   }
3849   nooffprocentries    = B->nooffprocentries;
3850   B->nooffprocentries = PETSC_TRUE;
3851   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3852   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3853   B->nooffprocentries = nooffprocentries;
3854 
3855   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3856   PetscFunctionReturn(0);
3857 }
3858 
3859 /*@
3860    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3861    (the default parallel PETSc format).
3862 
3863    Collective
3864 
3865    Input Parameters:
3866 +  B - the matrix
3867 .  i - the indices into j for the start of each local row (starts with zero)
3868 .  j - the column indices for each local row (starts with zero)
3869 -  v - optional values in the matrix
3870 
3871    Level: developer
3872 
3873    Notes:
3874        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3875      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3876      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3877 
3878        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3879 
3880        The format which is used for the sparse matrix input, is equivalent to a
3881     row-major ordering.. i.e for the following matrix, the input data expected is
3882     as shown
3883 
3884 $        1 0 0
3885 $        2 0 3     P0
3886 $       -------
3887 $        4 5 6     P1
3888 $
3889 $     Process0 [P0]: rows_owned=[0,1]
3890 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3891 $        j =  {0,0,2}  [size = 3]
3892 $        v =  {1,2,3}  [size = 3]
3893 $
3894 $     Process1 [P1]: rows_owned=[2]
3895 $        i =  {0,3}    [size = nrow+1  = 1+1]
3896 $        j =  {0,1,2}  [size = 3]
3897 $        v =  {4,5,6}  [size = 3]
3898 
3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3900           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3901 @*/
3902 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3903 {
3904   PetscFunctionBegin;
3905   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 /*@C
3910    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3911    (the default parallel PETSc format).  For good matrix assembly performance
3912    the user should preallocate the matrix storage by setting the parameters
3913    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3914    performance can be increased by more than a factor of 50.
3915 
3916    Collective
3917 
3918    Input Parameters:
3919 +  B - the matrix
3920 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3921            (same value is used for all local rows)
3922 .  d_nnz - array containing the number of nonzeros in the various rows of the
3923            DIAGONAL portion of the local submatrix (possibly different for each row)
3924            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3925            The size of this array is equal to the number of local rows, i.e 'm'.
3926            For matrices that will be factored, you must leave room for (and set)
3927            the diagonal entry even if it is zero.
3928 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3929            submatrix (same value is used for all local rows).
3930 -  o_nnz - array containing the number of nonzeros in the various rows of the
3931            OFF-DIAGONAL portion of the local submatrix (possibly different for
3932            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3933            structure. The size of this array is equal to the number
3934            of local rows, i.e 'm'.
3935 
3936    If the *_nnz parameter is given then the *_nz parameter is ignored
3937 
3938    The AIJ format (also called the Yale sparse matrix format or
3939    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3940    storage.  The stored row and column indices begin with zero.
3941    See Users-Manual: ch_mat for details.
3942 
3943    The parallel matrix is partitioned such that the first m0 rows belong to
3944    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3945    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3946 
3947    The DIAGONAL portion of the local submatrix of a processor can be defined
3948    as the submatrix which is obtained by extraction the part corresponding to
3949    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3950    first row that belongs to the processor, r2 is the last row belonging to
3951    the this processor, and c1-c2 is range of indices of the local part of a
3952    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3953    common case of a square matrix, the row and column ranges are the same and
3954    the DIAGONAL part is also square. The remaining portion of the local
3955    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3956 
3957    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3958 
3959    You can call MatGetInfo() to get information on how effective the preallocation was;
3960    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3961    You can also run with the option -info and look for messages with the string
3962    malloc in them to see if additional memory allocation was needed.
3963 
3964    Example usage:
3965 
3966    Consider the following 8x8 matrix with 34 non-zero values, that is
3967    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3968    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3969    as follows:
3970 
3971 .vb
3972             1  2  0  |  0  3  0  |  0  4
3973     Proc0   0  5  6  |  7  0  0  |  8  0
3974             9  0 10  | 11  0  0  | 12  0
3975     -------------------------------------
3976            13  0 14  | 15 16 17  |  0  0
3977     Proc1   0 18  0  | 19 20 21  |  0  0
3978             0  0  0  | 22 23  0  | 24  0
3979     -------------------------------------
3980     Proc2  25 26 27  |  0  0 28  | 29  0
3981            30  0  0  | 31 32 33  |  0 34
3982 .ve
3983 
3984    This can be represented as a collection of submatrices as:
3985 
3986 .vb
3987       A B C
3988       D E F
3989       G H I
3990 .ve
3991 
3992    Where the submatrices A,B,C are owned by proc0, D,E,F are
3993    owned by proc1, G,H,I are owned by proc2.
3994 
3995    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3996    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3997    The 'M','N' parameters are 8,8, and have the same values on all procs.
3998 
3999    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4000    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4001    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4002    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4003    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4004    matrix, ans [DF] as another SeqAIJ matrix.
4005 
4006    When d_nz, o_nz parameters are specified, d_nz storage elements are
4007    allocated for every row of the local diagonal submatrix, and o_nz
4008    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4009    One way to choose d_nz and o_nz is to use the max nonzerors per local
4010    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4011    In this case, the values of d_nz,o_nz are:
4012 .vb
4013      proc0 : dnz = 2, o_nz = 2
4014      proc1 : dnz = 3, o_nz = 2
4015      proc2 : dnz = 1, o_nz = 4
4016 .ve
4017    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4018    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4019    for proc3. i.e we are using 12+15+10=37 storage locations to store
4020    34 values.
4021 
4022    When d_nnz, o_nnz parameters are specified, the storage is specified
4023    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4024    In the above case the values for d_nnz,o_nnz are:
4025 .vb
4026      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4027      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4028      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4029 .ve
4030    Here the space allocated is sum of all the above values i.e 34, and
4031    hence pre-allocation is perfect.
4032 
4033    Level: intermediate
4034 
4035 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4036           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4037 @*/
4038 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4039 {
4040   PetscFunctionBegin;
4041   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4042   PetscValidType(B,1);
4043   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4044   PetscFunctionReturn(0);
4045 }
4046 
4047 /*@
4048      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4049          CSR format for the local rows.
4050 
4051    Collective
4052 
4053    Input Parameters:
4054 +  comm - MPI communicator
4055 .  m - number of local rows (Cannot be PETSC_DECIDE)
4056 .  n - This value should be the same as the local size used in creating the
4057        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4058        calculated if N is given) For square matrices n is almost always m.
4059 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4060 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4061 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4062 .   j - column indices
4063 -   a - matrix values
4064 
4065    Output Parameter:
4066 .   mat - the matrix
4067 
4068    Level: intermediate
4069 
4070    Notes:
4071        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4072      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4073      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4074 
4075        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4076 
4077        The format which is used for the sparse matrix input, is equivalent to a
4078     row-major ordering.. i.e for the following matrix, the input data expected is
4079     as shown
4080 
4081        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4082 
4083 $        1 0 0
4084 $        2 0 3     P0
4085 $       -------
4086 $        4 5 6     P1
4087 $
4088 $     Process0 [P0]: rows_owned=[0,1]
4089 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4090 $        j =  {0,0,2}  [size = 3]
4091 $        v =  {1,2,3}  [size = 3]
4092 $
4093 $     Process1 [P1]: rows_owned=[2]
4094 $        i =  {0,3}    [size = nrow+1  = 1+1]
4095 $        j =  {0,1,2}  [size = 3]
4096 $        v =  {4,5,6}  [size = 3]
4097 
4098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4099           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4100 @*/
4101 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4102 {
4103   PetscFunctionBegin;
4104   PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4105   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4106   PetscCall(MatCreate(comm,mat));
4107   PetscCall(MatSetSizes(*mat,m,n,M,N));
4108   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4109   PetscCall(MatSetType(*mat,MATMPIAIJ));
4110   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4111   PetscFunctionReturn(0);
4112 }
4113 
4114 /*@
4115      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4116          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4117 
4118    Collective
4119 
4120    Input Parameters:
4121 +  mat - the matrix
4122 .  m - number of local rows (Cannot be PETSC_DECIDE)
4123 .  n - This value should be the same as the local size used in creating the
4124        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4125        calculated if N is given) For square matrices n is almost always m.
4126 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4127 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4128 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4129 .  J - column indices
4130 -  v - matrix values
4131 
4132    Level: intermediate
4133 
4134 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4135           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4136 @*/
4137 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4138 {
4139   PetscInt       cstart,nnz,i,j;
4140   PetscInt       *ld;
4141   PetscBool      nooffprocentries;
4142   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4143   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4144   PetscScalar    *ad,*ao;
4145   const PetscInt *Adi = Ad->i;
4146   PetscInt       ldi,Iii,md;
4147 
4148   PetscFunctionBegin;
4149   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4150   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4151   PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4152   PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4153 
4154   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4155   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4156   cstart = mat->cmap->rstart;
4157   if (!Aij->ld) {
4158     /* count number of entries below block diagonal */
4159     PetscCall(PetscCalloc1(m,&ld));
4160     Aij->ld = ld;
4161     for (i=0; i<m; i++) {
4162       nnz  = Ii[i+1]- Ii[i];
4163       j     = 0;
4164       while  (J[j] < cstart && j < nnz) {j++;}
4165       J    += nnz;
4166       ld[i] = j;
4167     }
4168   } else {
4169     ld = Aij->ld;
4170   }
4171 
4172   for (i=0; i<m; i++) {
4173     nnz  = Ii[i+1]- Ii[i];
4174     Iii  = Ii[i];
4175     ldi  = ld[i];
4176     md   = Adi[i+1]-Adi[i];
4177     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4178     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4179     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4180     ad  += md;
4181     ao  += nnz - md;
4182   }
4183   nooffprocentries      = mat->nooffprocentries;
4184   mat->nooffprocentries = PETSC_TRUE;
4185   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4186   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4187   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4188   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4189   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4190   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4191   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4192   mat->nooffprocentries = nooffprocentries;
4193   PetscFunctionReturn(0);
4194 }
4195 
4196 /*@C
4197    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4198    (the default parallel PETSc format).  For good matrix assembly performance
4199    the user should preallocate the matrix storage by setting the parameters
4200    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4201    performance can be increased by more than a factor of 50.
4202 
4203    Collective
4204 
4205    Input Parameters:
4206 +  comm - MPI communicator
4207 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4208            This value should be the same as the local size used in creating the
4209            y vector for the matrix-vector product y = Ax.
4210 .  n - This value should be the same as the local size used in creating the
4211        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4212        calculated if N is given) For square matrices n is almost always m.
4213 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4214 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4215 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4216            (same value is used for all local rows)
4217 .  d_nnz - array containing the number of nonzeros in the various rows of the
4218            DIAGONAL portion of the local submatrix (possibly different for each row)
4219            or NULL, if d_nz is used to specify the nonzero structure.
4220            The size of this array is equal to the number of local rows, i.e 'm'.
4221 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4222            submatrix (same value is used for all local rows).
4223 -  o_nnz - array containing the number of nonzeros in the various rows of the
4224            OFF-DIAGONAL portion of the local submatrix (possibly different for
4225            each row) or NULL, if o_nz is used to specify the nonzero
4226            structure. The size of this array is equal to the number
4227            of local rows, i.e 'm'.
4228 
4229    Output Parameter:
4230 .  A - the matrix
4231 
4232    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4233    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4234    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4235 
4236    Notes:
4237    If the *_nnz parameter is given then the *_nz parameter is ignored
4238 
4239    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4240    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4241    storage requirements for this matrix.
4242 
4243    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4244    processor than it must be used on all processors that share the object for
4245    that argument.
4246 
4247    The user MUST specify either the local or global matrix dimensions
4248    (possibly both).
4249 
4250    The parallel matrix is partitioned across processors such that the
4251    first m0 rows belong to process 0, the next m1 rows belong to
4252    process 1, the next m2 rows belong to process 2 etc.. where
4253    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4254    values corresponding to [m x N] submatrix.
4255 
4256    The columns are logically partitioned with the n0 columns belonging
4257    to 0th partition, the next n1 columns belonging to the next
4258    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4259 
4260    The DIAGONAL portion of the local submatrix on any given processor
4261    is the submatrix corresponding to the rows and columns m,n
4262    corresponding to the given processor. i.e diagonal matrix on
4263    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4264    etc. The remaining portion of the local submatrix [m x (N-n)]
4265    constitute the OFF-DIAGONAL portion. The example below better
4266    illustrates this concept.
4267 
4268    For a square global matrix we define each processor's diagonal portion
4269    to be its local rows and the corresponding columns (a square submatrix);
4270    each processor's off-diagonal portion encompasses the remainder of the
4271    local matrix (a rectangular submatrix).
4272 
4273    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4274 
4275    When calling this routine with a single process communicator, a matrix of
4276    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4277    type of communicator, use the construction mechanism
4278 .vb
4279      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4280 .ve
4281 
4282 $     MatCreate(...,&A);
4283 $     MatSetType(A,MATMPIAIJ);
4284 $     MatSetSizes(A, m,n,M,N);
4285 $     MatMPIAIJSetPreallocation(A,...);
4286 
4287    By default, this format uses inodes (identical nodes) when possible.
4288    We search for consecutive rows with the same nonzero structure, thereby
4289    reusing matrix information to achieve increased efficiency.
4290 
4291    Options Database Keys:
4292 +  -mat_no_inode  - Do not use inodes
4293 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4294 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4295         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4296         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4297 
4298    Example usage:
4299 
4300    Consider the following 8x8 matrix with 34 non-zero values, that is
4301    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4302    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4303    as follows
4304 
4305 .vb
4306             1  2  0  |  0  3  0  |  0  4
4307     Proc0   0  5  6  |  7  0  0  |  8  0
4308             9  0 10  | 11  0  0  | 12  0
4309     -------------------------------------
4310            13  0 14  | 15 16 17  |  0  0
4311     Proc1   0 18  0  | 19 20 21  |  0  0
4312             0  0  0  | 22 23  0  | 24  0
4313     -------------------------------------
4314     Proc2  25 26 27  |  0  0 28  | 29  0
4315            30  0  0  | 31 32 33  |  0 34
4316 .ve
4317 
4318    This can be represented as a collection of submatrices as
4319 
4320 .vb
4321       A B C
4322       D E F
4323       G H I
4324 .ve
4325 
4326    Where the submatrices A,B,C are owned by proc0, D,E,F are
4327    owned by proc1, G,H,I are owned by proc2.
4328 
4329    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4330    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4331    The 'M','N' parameters are 8,8, and have the same values on all procs.
4332 
4333    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4334    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4335    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4336    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4337    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4338    matrix, ans [DF] as another SeqAIJ matrix.
4339 
4340    When d_nz, o_nz parameters are specified, d_nz storage elements are
4341    allocated for every row of the local diagonal submatrix, and o_nz
4342    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4343    One way to choose d_nz and o_nz is to use the max nonzerors per local
4344    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4345    In this case, the values of d_nz,o_nz are
4346 .vb
4347      proc0 : dnz = 2, o_nz = 2
4348      proc1 : dnz = 3, o_nz = 2
4349      proc2 : dnz = 1, o_nz = 4
4350 .ve
4351    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4352    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4353    for proc3. i.e we are using 12+15+10=37 storage locations to store
4354    34 values.
4355 
4356    When d_nnz, o_nnz parameters are specified, the storage is specified
4357    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4358    In the above case the values for d_nnz,o_nnz are
4359 .vb
4360      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4361      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4362      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4363 .ve
4364    Here the space allocated is sum of all the above values i.e 34, and
4365    hence pre-allocation is perfect.
4366 
4367    Level: intermediate
4368 
4369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4370           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4371 @*/
4372 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4373 {
4374   PetscMPIInt    size;
4375 
4376   PetscFunctionBegin;
4377   PetscCall(MatCreate(comm,A));
4378   PetscCall(MatSetSizes(*A,m,n,M,N));
4379   PetscCallMPI(MPI_Comm_size(comm,&size));
4380   if (size > 1) {
4381     PetscCall(MatSetType(*A,MATMPIAIJ));
4382     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4383   } else {
4384     PetscCall(MatSetType(*A,MATSEQAIJ));
4385     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4386   }
4387   PetscFunctionReturn(0);
4388 }
4389 
4390 /*@C
4391   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4392 
4393   Not collective
4394 
4395   Input Parameter:
4396 . A - The MPIAIJ matrix
4397 
4398   Output Parameters:
4399 + Ad - The local diagonal block as a SeqAIJ matrix
4400 . Ao - The local off-diagonal block as a SeqAIJ matrix
4401 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4402 
4403   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4404   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4405   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4406   local column numbers to global column numbers in the original matrix.
4407 
4408   Level: intermediate
4409 
4410 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4411 @*/
4412 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4413 {
4414   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4415   PetscBool      flg;
4416 
4417   PetscFunctionBegin;
4418   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4419   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4420   if (Ad)     *Ad     = a->A;
4421   if (Ao)     *Ao     = a->B;
4422   if (colmap) *colmap = a->garray;
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4427 {
4428   PetscErrorCode ierr;
4429   PetscInt       m,N,i,rstart,nnz,Ii;
4430   PetscInt       *indx;
4431   PetscScalar    *values;
4432   MatType        rootType;
4433 
4434   PetscFunctionBegin;
4435   PetscCall(MatGetSize(inmat,&m,&N));
4436   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4437     PetscInt       *dnz,*onz,sum,bs,cbs;
4438 
4439     if (n == PETSC_DECIDE) {
4440       PetscCall(PetscSplitOwnership(comm,&n,&N));
4441     }
4442     /* Check sum(n) = N */
4443     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4444     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4445 
4446     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4447     rstart -= m;
4448 
4449     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);PetscCall(ierr);
4450     for (i=0; i<m; i++) {
4451       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4452       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4453       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4454     }
4455 
4456     PetscCall(MatCreate(comm,outmat));
4457     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4458     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4459     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4460     PetscCall(MatGetRootType_Private(inmat,&rootType));
4461     PetscCall(MatSetType(*outmat,rootType));
4462     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4463     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4464     ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
4465     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4466   }
4467 
4468   /* numeric phase */
4469   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4470   for (i=0; i<m; i++) {
4471     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4472     Ii   = i + rstart;
4473     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4474     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4475   }
4476   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4477   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4478   PetscFunctionReturn(0);
4479 }
4480 
4481 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4482 {
4483   PetscMPIInt       rank;
4484   PetscInt          m,N,i,rstart,nnz;
4485   size_t            len;
4486   const PetscInt    *indx;
4487   PetscViewer       out;
4488   char              *name;
4489   Mat               B;
4490   const PetscScalar *values;
4491 
4492   PetscFunctionBegin;
4493   PetscCall(MatGetLocalSize(A,&m,NULL));
4494   PetscCall(MatGetSize(A,NULL,&N));
4495   /* Should this be the type of the diagonal block of A? */
4496   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4497   PetscCall(MatSetSizes(B,m,N,m,N));
4498   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4499   PetscCall(MatSetType(B,MATSEQAIJ));
4500   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4501   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4502   for (i=0; i<m; i++) {
4503     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4504     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4505     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4506   }
4507   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4508   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4509 
4510   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4511   PetscCall(PetscStrlen(outfile,&len));
4512   PetscCall(PetscMalloc1(len+6,&name));
4513   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4514   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4515   PetscCall(PetscFree(name));
4516   PetscCall(MatView(B,out));
4517   PetscCall(PetscViewerDestroy(&out));
4518   PetscCall(MatDestroy(&B));
4519   PetscFunctionReturn(0);
4520 }
4521 
4522 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4523 {
4524   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4525 
4526   PetscFunctionBegin;
4527   if (!merge) PetscFunctionReturn(0);
4528   PetscCall(PetscFree(merge->id_r));
4529   PetscCall(PetscFree(merge->len_s));
4530   PetscCall(PetscFree(merge->len_r));
4531   PetscCall(PetscFree(merge->bi));
4532   PetscCall(PetscFree(merge->bj));
4533   PetscCall(PetscFree(merge->buf_ri[0]));
4534   PetscCall(PetscFree(merge->buf_ri));
4535   PetscCall(PetscFree(merge->buf_rj[0]));
4536   PetscCall(PetscFree(merge->buf_rj));
4537   PetscCall(PetscFree(merge->coi));
4538   PetscCall(PetscFree(merge->coj));
4539   PetscCall(PetscFree(merge->owners_co));
4540   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4541   PetscCall(PetscFree(merge));
4542   PetscFunctionReturn(0);
4543 }
4544 
4545 #include <../src/mat/utils/freespace.h>
4546 #include <petscbt.h>
4547 
4548 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4549 {
4550   MPI_Comm            comm;
4551   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4552   PetscMPIInt         size,rank,taga,*len_s;
4553   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4554   PetscInt            proc,m;
4555   PetscInt            **buf_ri,**buf_rj;
4556   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4557   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4558   MPI_Request         *s_waits,*r_waits;
4559   MPI_Status          *status;
4560   const MatScalar     *aa,*a_a;
4561   MatScalar           **abuf_r,*ba_i;
4562   Mat_Merge_SeqsToMPI *merge;
4563   PetscContainer      container;
4564 
4565   PetscFunctionBegin;
4566   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4567   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4568 
4569   PetscCallMPI(MPI_Comm_size(comm,&size));
4570   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4571 
4572   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4573   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4574   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4575   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4576   aa   = a_a;
4577 
4578   bi     = merge->bi;
4579   bj     = merge->bj;
4580   buf_ri = merge->buf_ri;
4581   buf_rj = merge->buf_rj;
4582 
4583   PetscCall(PetscMalloc1(size,&status));
4584   owners = merge->rowmap->range;
4585   len_s  = merge->len_s;
4586 
4587   /* send and recv matrix values */
4588   /*-----------------------------*/
4589   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4590   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4591 
4592   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4593   for (proc=0,k=0; proc<size; proc++) {
4594     if (!len_s[proc]) continue;
4595     i    = owners[proc];
4596     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4597     k++;
4598   }
4599 
4600   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4601   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4602   PetscCall(PetscFree(status));
4603 
4604   PetscCall(PetscFree(s_waits));
4605   PetscCall(PetscFree(r_waits));
4606 
4607   /* insert mat values of mpimat */
4608   /*----------------------------*/
4609   PetscCall(PetscMalloc1(N,&ba_i));
4610   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4611 
4612   for (k=0; k<merge->nrecv; k++) {
4613     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4614     nrows       = *(buf_ri_k[k]);
4615     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4616     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4617   }
4618 
4619   /* set values of ba */
4620   m    = merge->rowmap->n;
4621   for (i=0; i<m; i++) {
4622     arow = owners[rank] + i;
4623     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4624     bnzi = bi[i+1] - bi[i];
4625     PetscCall(PetscArrayzero(ba_i,bnzi));
4626 
4627     /* add local non-zero vals of this proc's seqmat into ba */
4628     anzi   = ai[arow+1] - ai[arow];
4629     aj     = a->j + ai[arow];
4630     aa     = a_a + ai[arow];
4631     nextaj = 0;
4632     for (j=0; nextaj<anzi; j++) {
4633       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4634         ba_i[j] += aa[nextaj++];
4635       }
4636     }
4637 
4638     /* add received vals into ba */
4639     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4640       /* i-th row */
4641       if (i == *nextrow[k]) {
4642         anzi   = *(nextai[k]+1) - *nextai[k];
4643         aj     = buf_rj[k] + *(nextai[k]);
4644         aa     = abuf_r[k] + *(nextai[k]);
4645         nextaj = 0;
4646         for (j=0; nextaj<anzi; j++) {
4647           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4648             ba_i[j] += aa[nextaj++];
4649           }
4650         }
4651         nextrow[k]++; nextai[k]++;
4652       }
4653     }
4654     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4655   }
4656   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4657   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4658   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4659 
4660   PetscCall(PetscFree(abuf_r[0]));
4661   PetscCall(PetscFree(abuf_r));
4662   PetscCall(PetscFree(ba_i));
4663   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4664   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4665   PetscFunctionReturn(0);
4666 }
4667 
4668 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4669 {
4670   PetscErrorCode      ierr;
4671   Mat                 B_mpi;
4672   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4673   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4674   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4675   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4676   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4677   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4678   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4679   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4680   MPI_Status          *status;
4681   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4682   PetscBT             lnkbt;
4683   Mat_Merge_SeqsToMPI *merge;
4684   PetscContainer      container;
4685 
4686   PetscFunctionBegin;
4687   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4688 
4689   /* make sure it is a PETSc comm */
4690   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4691   PetscCallMPI(MPI_Comm_size(comm,&size));
4692   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4693 
4694   PetscCall(PetscNew(&merge));
4695   PetscCall(PetscMalloc1(size,&status));
4696 
4697   /* determine row ownership */
4698   /*---------------------------------------------------------*/
4699   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4700   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4701   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4702   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4703   PetscCall(PetscLayoutSetUp(merge->rowmap));
4704   PetscCall(PetscMalloc1(size,&len_si));
4705   PetscCall(PetscMalloc1(size,&merge->len_s));
4706 
4707   m      = merge->rowmap->n;
4708   owners = merge->rowmap->range;
4709 
4710   /* determine the number of messages to send, their lengths */
4711   /*---------------------------------------------------------*/
4712   len_s = merge->len_s;
4713 
4714   len          = 0; /* length of buf_si[] */
4715   merge->nsend = 0;
4716   for (proc=0; proc<size; proc++) {
4717     len_si[proc] = 0;
4718     if (proc == rank) {
4719       len_s[proc] = 0;
4720     } else {
4721       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4722       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4723     }
4724     if (len_s[proc]) {
4725       merge->nsend++;
4726       nrows = 0;
4727       for (i=owners[proc]; i<owners[proc+1]; i++) {
4728         if (ai[i+1] > ai[i]) nrows++;
4729       }
4730       len_si[proc] = 2*(nrows+1);
4731       len         += len_si[proc];
4732     }
4733   }
4734 
4735   /* determine the number and length of messages to receive for ij-structure */
4736   /*-------------------------------------------------------------------------*/
4737   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4738   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4739 
4740   /* post the Irecv of j-structure */
4741   /*-------------------------------*/
4742   PetscCall(PetscCommGetNewTag(comm,&tagj));
4743   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4744 
4745   /* post the Isend of j-structure */
4746   /*--------------------------------*/
4747   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4748 
4749   for (proc=0, k=0; proc<size; proc++) {
4750     if (!len_s[proc]) continue;
4751     i    = owners[proc];
4752     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4753     k++;
4754   }
4755 
4756   /* receives and sends of j-structure are complete */
4757   /*------------------------------------------------*/
4758   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4759   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4760 
4761   /* send and recv i-structure */
4762   /*---------------------------*/
4763   PetscCall(PetscCommGetNewTag(comm,&tagi));
4764   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4765 
4766   PetscCall(PetscMalloc1(len+1,&buf_s));
4767   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4768   for (proc=0,k=0; proc<size; proc++) {
4769     if (!len_s[proc]) continue;
4770     /* form outgoing message for i-structure:
4771          buf_si[0]:                 nrows to be sent
4772                [1:nrows]:           row index (global)
4773                [nrows+1:2*nrows+1]: i-structure index
4774     */
4775     /*-------------------------------------------*/
4776     nrows       = len_si[proc]/2 - 1;
4777     buf_si_i    = buf_si + nrows+1;
4778     buf_si[0]   = nrows;
4779     buf_si_i[0] = 0;
4780     nrows       = 0;
4781     for (i=owners[proc]; i<owners[proc+1]; i++) {
4782       anzi = ai[i+1] - ai[i];
4783       if (anzi) {
4784         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4785         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4786         nrows++;
4787       }
4788     }
4789     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4790     k++;
4791     buf_si += len_si[proc];
4792   }
4793 
4794   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4795   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4796 
4797   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4798   for (i=0; i<merge->nrecv; i++) {
4799     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4800   }
4801 
4802   PetscCall(PetscFree(len_si));
4803   PetscCall(PetscFree(len_ri));
4804   PetscCall(PetscFree(rj_waits));
4805   PetscCall(PetscFree2(si_waits,sj_waits));
4806   PetscCall(PetscFree(ri_waits));
4807   PetscCall(PetscFree(buf_s));
4808   PetscCall(PetscFree(status));
4809 
4810   /* compute a local seq matrix in each processor */
4811   /*----------------------------------------------*/
4812   /* allocate bi array and free space for accumulating nonzero column info */
4813   PetscCall(PetscMalloc1(m+1,&bi));
4814   bi[0] = 0;
4815 
4816   /* create and initialize a linked list */
4817   nlnk = N+1;
4818   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4819 
4820   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4821   len  = ai[owners[rank+1]] - ai[owners[rank]];
4822   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4823 
4824   current_space = free_space;
4825 
4826   /* determine symbolic info for each local row */
4827   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4828 
4829   for (k=0; k<merge->nrecv; k++) {
4830     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4831     nrows       = *buf_ri_k[k];
4832     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4833     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4834   }
4835 
4836   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);PetscCall(ierr);
4837   len  = 0;
4838   for (i=0; i<m; i++) {
4839     bnzi = 0;
4840     /* add local non-zero cols of this proc's seqmat into lnk */
4841     arow  = owners[rank] + i;
4842     anzi  = ai[arow+1] - ai[arow];
4843     aj    = a->j + ai[arow];
4844     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4845     bnzi += nlnk;
4846     /* add received col data into lnk */
4847     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4848       if (i == *nextrow[k]) { /* i-th row */
4849         anzi  = *(nextai[k]+1) - *nextai[k];
4850         aj    = buf_rj[k] + *nextai[k];
4851         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4852         bnzi += nlnk;
4853         nextrow[k]++; nextai[k]++;
4854       }
4855     }
4856     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4857 
4858     /* if free space is not available, make more free space */
4859     if (current_space->local_remaining<bnzi) {
4860       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4861       nspacedouble++;
4862     }
4863     /* copy data into free space, then initialize lnk */
4864     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4865     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4866 
4867     current_space->array           += bnzi;
4868     current_space->local_used      += bnzi;
4869     current_space->local_remaining -= bnzi;
4870 
4871     bi[i+1] = bi[i] + bnzi;
4872   }
4873 
4874   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4875 
4876   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4877   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4878   PetscCall(PetscLLDestroy(lnk,lnkbt));
4879 
4880   /* create symbolic parallel matrix B_mpi */
4881   /*---------------------------------------*/
4882   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4883   PetscCall(MatCreate(comm,&B_mpi));
4884   if (n==PETSC_DECIDE) {
4885     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4886   } else {
4887     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4888   }
4889   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4890   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4891   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4892   ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
4893   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4894 
4895   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4896   B_mpi->assembled  = PETSC_FALSE;
4897   merge->bi         = bi;
4898   merge->bj         = bj;
4899   merge->buf_ri     = buf_ri;
4900   merge->buf_rj     = buf_rj;
4901   merge->coi        = NULL;
4902   merge->coj        = NULL;
4903   merge->owners_co  = NULL;
4904 
4905   PetscCall(PetscCommDestroy(&comm));
4906 
4907   /* attach the supporting struct to B_mpi for reuse */
4908   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4909   PetscCall(PetscContainerSetPointer(container,merge));
4910   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4911   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4912   PetscCall(PetscContainerDestroy(&container));
4913   *mpimat = B_mpi;
4914 
4915   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4916   PetscFunctionReturn(0);
4917 }
4918 
4919 /*@C
4920       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4921                  matrices from each processor
4922 
4923     Collective
4924 
4925    Input Parameters:
4926 +    comm - the communicators the parallel matrix will live on
4927 .    seqmat - the input sequential matrices
4928 .    m - number of local rows (or PETSC_DECIDE)
4929 .    n - number of local columns (or PETSC_DECIDE)
4930 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4931 
4932    Output Parameter:
4933 .    mpimat - the parallel matrix generated
4934 
4935     Level: advanced
4936 
4937    Notes:
4938      The dimensions of the sequential matrix in each processor MUST be the same.
4939      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4940      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4941 @*/
4942 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4943 {
4944   PetscMPIInt    size;
4945 
4946   PetscFunctionBegin;
4947   PetscCallMPI(MPI_Comm_size(comm,&size));
4948   if (size == 1) {
4949     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4950     if (scall == MAT_INITIAL_MATRIX) {
4951       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4952     } else {
4953       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4954     }
4955     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4956     PetscFunctionReturn(0);
4957   }
4958   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4959   if (scall == MAT_INITIAL_MATRIX) {
4960     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4961   }
4962   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4963   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4964   PetscFunctionReturn(0);
4965 }
4966 
4967 /*@
4968      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4969           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4970           with MatGetSize()
4971 
4972     Not Collective
4973 
4974    Input Parameters:
4975 +    A - the matrix
4976 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4977 
4978    Output Parameter:
4979 .    A_loc - the local sequential matrix generated
4980 
4981     Level: developer
4982 
4983    Notes:
4984      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4985      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4986      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4987      modify the values of the returned A_loc.
4988 
4989 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
4990 @*/
4991 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4992 {
4993   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
4994   Mat_SeqAIJ        *mat,*a,*b;
4995   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4996   const PetscScalar *aa,*ba,*aav,*bav;
4997   PetscScalar       *ca,*cam;
4998   PetscMPIInt       size;
4999   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5000   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5001   PetscBool         match;
5002 
5003   PetscFunctionBegin;
5004   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5005   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5006   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5007   if (size == 1) {
5008     if (scall == MAT_INITIAL_MATRIX) {
5009       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5010       *A_loc = mpimat->A;
5011     } else if (scall == MAT_REUSE_MATRIX) {
5012       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5013     }
5014     PetscFunctionReturn(0);
5015   }
5016 
5017   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5018   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5019   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5020   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5021   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5022   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5023   aa   = aav;
5024   ba   = bav;
5025   if (scall == MAT_INITIAL_MATRIX) {
5026     PetscCall(PetscMalloc1(1+am,&ci));
5027     ci[0] = 0;
5028     for (i=0; i<am; i++) {
5029       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5030     }
5031     PetscCall(PetscMalloc1(1+ci[am],&cj));
5032     PetscCall(PetscMalloc1(1+ci[am],&ca));
5033     k    = 0;
5034     for (i=0; i<am; i++) {
5035       ncols_o = bi[i+1] - bi[i];
5036       ncols_d = ai[i+1] - ai[i];
5037       /* off-diagonal portion of A */
5038       for (jo=0; jo<ncols_o; jo++) {
5039         col = cmap[*bj];
5040         if (col >= cstart) break;
5041         cj[k]   = col; bj++;
5042         ca[k++] = *ba++;
5043       }
5044       /* diagonal portion of A */
5045       for (j=0; j<ncols_d; j++) {
5046         cj[k]   = cstart + *aj++;
5047         ca[k++] = *aa++;
5048       }
5049       /* off-diagonal portion of A */
5050       for (j=jo; j<ncols_o; j++) {
5051         cj[k]   = cmap[*bj++];
5052         ca[k++] = *ba++;
5053       }
5054     }
5055     /* put together the new matrix */
5056     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5057     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5058     /* Since these are PETSc arrays, change flags to free them as necessary. */
5059     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5060     mat->free_a  = PETSC_TRUE;
5061     mat->free_ij = PETSC_TRUE;
5062     mat->nonew   = 0;
5063   } else if (scall == MAT_REUSE_MATRIX) {
5064     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5065     ci   = mat->i;
5066     cj   = mat->j;
5067     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5068     for (i=0; i<am; i++) {
5069       /* off-diagonal portion of A */
5070       ncols_o = bi[i+1] - bi[i];
5071       for (jo=0; jo<ncols_o; jo++) {
5072         col = cmap[*bj];
5073         if (col >= cstart) break;
5074         *cam++ = *ba++; bj++;
5075       }
5076       /* diagonal portion of A */
5077       ncols_d = ai[i+1] - ai[i];
5078       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5079       /* off-diagonal portion of A */
5080       for (j=jo; j<ncols_o; j++) {
5081         *cam++ = *ba++; bj++;
5082       }
5083     }
5084     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5085   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5086   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5087   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5088   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5089   PetscFunctionReturn(0);
5090 }
5091 
5092 /*@
5093      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5094           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5095 
5096     Not Collective
5097 
5098    Input Parameters:
5099 +    A - the matrix
5100 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5101 
5102    Output Parameters:
5103 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5104 -    A_loc - the local sequential matrix generated
5105 
5106     Level: developer
5107 
5108    Notes:
5109      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5110 
5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5112 
5113 @*/
5114 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5115 {
5116   Mat            Ao,Ad;
5117   const PetscInt *cmap;
5118   PetscMPIInt    size;
5119   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5120 
5121   PetscFunctionBegin;
5122   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5123   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5124   if (size == 1) {
5125     if (scall == MAT_INITIAL_MATRIX) {
5126       PetscCall(PetscObjectReference((PetscObject)Ad));
5127       *A_loc = Ad;
5128     } else if (scall == MAT_REUSE_MATRIX) {
5129       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5130     }
5131     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5132     PetscFunctionReturn(0);
5133   }
5134   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5135   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5136   if (f) {
5137     PetscCall((*f)(A,scall,glob,A_loc));
5138   } else {
5139     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5140     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5141     Mat_SeqAIJ        *c;
5142     PetscInt          *ai = a->i, *aj = a->j;
5143     PetscInt          *bi = b->i, *bj = b->j;
5144     PetscInt          *ci,*cj;
5145     const PetscScalar *aa,*ba;
5146     PetscScalar       *ca;
5147     PetscInt          i,j,am,dn,on;
5148 
5149     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5150     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5151     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5152     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5153     if (scall == MAT_INITIAL_MATRIX) {
5154       PetscInt k;
5155       PetscCall(PetscMalloc1(1+am,&ci));
5156       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5157       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5158       ci[0] = 0;
5159       for (i=0,k=0; i<am; i++) {
5160         const PetscInt ncols_o = bi[i+1] - bi[i];
5161         const PetscInt ncols_d = ai[i+1] - ai[i];
5162         ci[i+1] = ci[i] + ncols_o + ncols_d;
5163         /* diagonal portion of A */
5164         for (j=0; j<ncols_d; j++,k++) {
5165           cj[k] = *aj++;
5166           ca[k] = *aa++;
5167         }
5168         /* off-diagonal portion of A */
5169         for (j=0; j<ncols_o; j++,k++) {
5170           cj[k] = dn + *bj++;
5171           ca[k] = *ba++;
5172         }
5173       }
5174       /* put together the new matrix */
5175       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5176       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5177       /* Since these are PETSc arrays, change flags to free them as necessary. */
5178       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5179       c->free_a  = PETSC_TRUE;
5180       c->free_ij = PETSC_TRUE;
5181       c->nonew   = 0;
5182       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5183     } else if (scall == MAT_REUSE_MATRIX) {
5184       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5185       for (i=0; i<am; i++) {
5186         const PetscInt ncols_d = ai[i+1] - ai[i];
5187         const PetscInt ncols_o = bi[i+1] - bi[i];
5188         /* diagonal portion of A */
5189         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5190         /* off-diagonal portion of A */
5191         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5192       }
5193       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5194     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5195     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5196     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5197     if (glob) {
5198       PetscInt cst, *gidx;
5199 
5200       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5201       PetscCall(PetscMalloc1(dn+on,&gidx));
5202       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5203       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5204       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5205     }
5206   }
5207   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5208   PetscFunctionReturn(0);
5209 }
5210 
5211 /*@C
5212      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5213 
5214     Not Collective
5215 
5216    Input Parameters:
5217 +    A - the matrix
5218 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5219 -    row, col - index sets of rows and columns to extract (or NULL)
5220 
5221    Output Parameter:
5222 .    A_loc - the local sequential matrix generated
5223 
5224     Level: developer
5225 
5226 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5227 
5228 @*/
5229 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5230 {
5231   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5232   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5233   IS             isrowa,iscola;
5234   Mat            *aloc;
5235   PetscBool      match;
5236 
5237   PetscFunctionBegin;
5238   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5239   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5240   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5241   if (!row) {
5242     start = A->rmap->rstart; end = A->rmap->rend;
5243     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5244   } else {
5245     isrowa = *row;
5246   }
5247   if (!col) {
5248     start = A->cmap->rstart;
5249     cmap  = a->garray;
5250     nzA   = a->A->cmap->n;
5251     nzB   = a->B->cmap->n;
5252     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5253     ncols = 0;
5254     for (i=0; i<nzB; i++) {
5255       if (cmap[i] < start) idx[ncols++] = cmap[i];
5256       else break;
5257     }
5258     imark = i;
5259     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5260     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5261     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5262   } else {
5263     iscola = *col;
5264   }
5265   if (scall != MAT_INITIAL_MATRIX) {
5266     PetscCall(PetscMalloc1(1,&aloc));
5267     aloc[0] = *A_loc;
5268   }
5269   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5270   if (!col) { /* attach global id of condensed columns */
5271     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5272   }
5273   *A_loc = aloc[0];
5274   PetscCall(PetscFree(aloc));
5275   if (!row) {
5276     PetscCall(ISDestroy(&isrowa));
5277   }
5278   if (!col) {
5279     PetscCall(ISDestroy(&iscola));
5280   }
5281   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5282   PetscFunctionReturn(0);
5283 }
5284 
5285 /*
5286  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5287  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5288  * on a global size.
5289  * */
5290 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5291 {
5292   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5293   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5294   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5295   PetscMPIInt              owner;
5296   PetscSFNode              *iremote,*oiremote;
5297   const PetscInt           *lrowindices;
5298   PetscSF                  sf,osf;
5299   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5300   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5301   MPI_Comm                 comm;
5302   ISLocalToGlobalMapping   mapping;
5303   const PetscScalar        *pd_a,*po_a;
5304 
5305   PetscFunctionBegin;
5306   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5307   /* plocalsize is the number of roots
5308    * nrows is the number of leaves
5309    * */
5310   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5311   PetscCall(ISGetLocalSize(rows,&nrows));
5312   PetscCall(PetscCalloc1(nrows,&iremote));
5313   PetscCall(ISGetIndices(rows,&lrowindices));
5314   for (i=0;i<nrows;i++) {
5315     /* Find a remote index and an owner for a row
5316      * The row could be local or remote
5317      * */
5318     owner = 0;
5319     lidx  = 0;
5320     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5321     iremote[i].index = lidx;
5322     iremote[i].rank  = owner;
5323   }
5324   /* Create SF to communicate how many nonzero columns for each row */
5325   PetscCall(PetscSFCreate(comm,&sf));
5326   /* SF will figure out the number of nonzero colunms for each row, and their
5327    * offsets
5328    * */
5329   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5330   PetscCall(PetscSFSetFromOptions(sf));
5331   PetscCall(PetscSFSetUp(sf));
5332 
5333   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5334   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5335   PetscCall(PetscCalloc1(nrows,&pnnz));
5336   roffsets[0] = 0;
5337   roffsets[1] = 0;
5338   for (i=0;i<plocalsize;i++) {
5339     /* diag */
5340     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5341     /* off diag */
5342     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5343     /* compute offsets so that we relative location for each row */
5344     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5345     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5346   }
5347   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5348   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5349   /* 'r' means root, and 'l' means leaf */
5350   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5351   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5352   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5353   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5354   PetscCall(PetscSFDestroy(&sf));
5355   PetscCall(PetscFree(roffsets));
5356   PetscCall(PetscFree(nrcols));
5357   dntotalcols = 0;
5358   ontotalcols = 0;
5359   ncol = 0;
5360   for (i=0;i<nrows;i++) {
5361     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5362     ncol = PetscMax(pnnz[i],ncol);
5363     /* diag */
5364     dntotalcols += nlcols[i*2+0];
5365     /* off diag */
5366     ontotalcols += nlcols[i*2+1];
5367   }
5368   /* We do not need to figure the right number of columns
5369    * since all the calculations will be done by going through the raw data
5370    * */
5371   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5372   PetscCall(MatSetUp(*P_oth));
5373   PetscCall(PetscFree(pnnz));
5374   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5375   /* diag */
5376   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5377   /* off diag */
5378   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5379   /* diag */
5380   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5381   /* off diag */
5382   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5383   dntotalcols = 0;
5384   ontotalcols = 0;
5385   ntotalcols  = 0;
5386   for (i=0;i<nrows;i++) {
5387     owner = 0;
5388     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5389     /* Set iremote for diag matrix */
5390     for (j=0;j<nlcols[i*2+0];j++) {
5391       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5392       iremote[dntotalcols].rank    = owner;
5393       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5394       ilocal[dntotalcols++]        = ntotalcols++;
5395     }
5396     /* off diag */
5397     for (j=0;j<nlcols[i*2+1];j++) {
5398       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5399       oiremote[ontotalcols].rank    = owner;
5400       oilocal[ontotalcols++]        = ntotalcols++;
5401     }
5402   }
5403   PetscCall(ISRestoreIndices(rows,&lrowindices));
5404   PetscCall(PetscFree(loffsets));
5405   PetscCall(PetscFree(nlcols));
5406   PetscCall(PetscSFCreate(comm,&sf));
5407   /* P serves as roots and P_oth is leaves
5408    * Diag matrix
5409    * */
5410   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5411   PetscCall(PetscSFSetFromOptions(sf));
5412   PetscCall(PetscSFSetUp(sf));
5413 
5414   PetscCall(PetscSFCreate(comm,&osf));
5415   /* Off diag */
5416   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5417   PetscCall(PetscSFSetFromOptions(osf));
5418   PetscCall(PetscSFSetUp(osf));
5419   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5420   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5421   /* We operate on the matrix internal data for saving memory */
5422   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5423   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5424   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5425   /* Convert to global indices for diag matrix */
5426   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5427   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5428   /* We want P_oth store global indices */
5429   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5430   /* Use memory scalable approach */
5431   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5432   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5433   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5434   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5435   /* Convert back to local indices */
5436   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5437   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5438   nout = 0;
5439   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5440   PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5441   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5442   /* Exchange values */
5443   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5444   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5445   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5446   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5447   /* Stop PETSc from shrinking memory */
5448   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5449   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5450   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5451   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5452   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5453   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5454   PetscCall(PetscSFDestroy(&sf));
5455   PetscCall(PetscSFDestroy(&osf));
5456   PetscFunctionReturn(0);
5457 }
5458 
5459 /*
5460  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5461  * This supports MPIAIJ and MAIJ
5462  * */
5463 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5464 {
5465   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5466   Mat_SeqAIJ            *p_oth;
5467   IS                    rows,map;
5468   PetscHMapI            hamp;
5469   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5470   MPI_Comm              comm;
5471   PetscSF               sf,osf;
5472   PetscBool             has;
5473 
5474   PetscFunctionBegin;
5475   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5476   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5477   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5478    *  and then create a submatrix (that often is an overlapping matrix)
5479    * */
5480   if (reuse == MAT_INITIAL_MATRIX) {
5481     /* Use a hash table to figure out unique keys */
5482     PetscCall(PetscHMapICreate(&hamp));
5483     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5484     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5485     count = 0;
5486     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5487     for (i=0;i<a->B->cmap->n;i++) {
5488       key  = a->garray[i]/dof;
5489       PetscCall(PetscHMapIHas(hamp,key,&has));
5490       if (!has) {
5491         mapping[i] = count;
5492         PetscCall(PetscHMapISet(hamp,key,count++));
5493       } else {
5494         /* Current 'i' has the same value the previous step */
5495         mapping[i] = count-1;
5496       }
5497     }
5498     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5499     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5500     PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5501     PetscCall(PetscCalloc1(htsize,&rowindices));
5502     off = 0;
5503     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5504     PetscCall(PetscHMapIDestroy(&hamp));
5505     PetscCall(PetscSortInt(htsize,rowindices));
5506     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5507     /* In case, the matrix was already created but users want to recreate the matrix */
5508     PetscCall(MatDestroy(P_oth));
5509     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5510     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5511     PetscCall(ISDestroy(&map));
5512     PetscCall(ISDestroy(&rows));
5513   } else if (reuse == MAT_REUSE_MATRIX) {
5514     /* If matrix was already created, we simply update values using SF objects
5515      * that as attached to the matrix ealier.
5516      */
5517     const PetscScalar *pd_a,*po_a;
5518 
5519     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5520     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5521     PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5522     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5523     /* Update values in place */
5524     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5525     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5526     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5527     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5528     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5529     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5530     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5531     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5532   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5533   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5534   PetscFunctionReturn(0);
5535 }
5536 
5537 /*@C
5538   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5539 
5540   Collective on Mat
5541 
5542   Input Parameters:
5543 + A - the first matrix in mpiaij format
5544 . B - the second matrix in mpiaij format
5545 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5546 
5547   Output Parameters:
5548 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5549 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5550 - B_seq - the sequential matrix generated
5551 
5552   Level: developer
5553 
5554 @*/
5555 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5556 {
5557   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5558   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5559   IS             isrowb,iscolb;
5560   Mat            *bseq=NULL;
5561 
5562   PetscFunctionBegin;
5563   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5564     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5565   }
5566   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5567 
5568   if (scall == MAT_INITIAL_MATRIX) {
5569     start = A->cmap->rstart;
5570     cmap  = a->garray;
5571     nzA   = a->A->cmap->n;
5572     nzB   = a->B->cmap->n;
5573     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5574     ncols = 0;
5575     for (i=0; i<nzB; i++) {  /* row < local row index */
5576       if (cmap[i] < start) idx[ncols++] = cmap[i];
5577       else break;
5578     }
5579     imark = i;
5580     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5581     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5582     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5583     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5584   } else {
5585     PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5586     isrowb  = *rowb; iscolb = *colb;
5587     PetscCall(PetscMalloc1(1,&bseq));
5588     bseq[0] = *B_seq;
5589   }
5590   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5591   *B_seq = bseq[0];
5592   PetscCall(PetscFree(bseq));
5593   if (!rowb) {
5594     PetscCall(ISDestroy(&isrowb));
5595   } else {
5596     *rowb = isrowb;
5597   }
5598   if (!colb) {
5599     PetscCall(ISDestroy(&iscolb));
5600   } else {
5601     *colb = iscolb;
5602   }
5603   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5604   PetscFunctionReturn(0);
5605 }
5606 
5607 /*
5608     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5609     of the OFF-DIAGONAL portion of local A
5610 
5611     Collective on Mat
5612 
5613    Input Parameters:
5614 +    A,B - the matrices in mpiaij format
5615 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5616 
5617    Output Parameter:
5618 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5619 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5620 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5621 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5622 
5623     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5624      for this matrix. This is not desirable..
5625 
5626     Level: developer
5627 
5628 */
5629 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5630 {
5631   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5632   Mat_SeqAIJ             *b_oth;
5633   VecScatter             ctx;
5634   MPI_Comm               comm;
5635   const PetscMPIInt      *rprocs,*sprocs;
5636   const PetscInt         *srow,*rstarts,*sstarts;
5637   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5638   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5639   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5640   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5641   PetscMPIInt            size,tag,rank,nreqs;
5642 
5643   PetscFunctionBegin;
5644   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5645   PetscCallMPI(MPI_Comm_size(comm,&size));
5646 
5647   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5648     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5649   }
5650   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5651   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5652 
5653   if (size == 1) {
5654     startsj_s = NULL;
5655     bufa_ptr  = NULL;
5656     *B_oth    = NULL;
5657     PetscFunctionReturn(0);
5658   }
5659 
5660   ctx = a->Mvctx;
5661   tag = ((PetscObject)ctx)->tag;
5662 
5663   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5664   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5665   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5666   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5667   PetscCall(PetscMalloc1(nreqs,&reqs));
5668   rwaits = reqs;
5669   swaits = reqs + nrecvs;
5670 
5671   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5672   if (scall == MAT_INITIAL_MATRIX) {
5673     /* i-array */
5674     /*---------*/
5675     /*  post receives */
5676     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5677     for (i=0; i<nrecvs; i++) {
5678       rowlen = rvalues + rstarts[i]*rbs;
5679       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5680       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5681     }
5682 
5683     /* pack the outgoing message */
5684     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5685 
5686     sstartsj[0] = 0;
5687     rstartsj[0] = 0;
5688     len         = 0; /* total length of j or a array to be sent */
5689     if (nsends) {
5690       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5691       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5692     }
5693     for (i=0; i<nsends; i++) {
5694       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5695       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5696       for (j=0; j<nrows; j++) {
5697         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5698         for (l=0; l<sbs; l++) {
5699           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5700 
5701           rowlen[j*sbs+l] = ncols;
5702 
5703           len += ncols;
5704           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5705         }
5706         k++;
5707       }
5708       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5709 
5710       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5711     }
5712     /* recvs and sends of i-array are completed */
5713     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5714     PetscCall(PetscFree(svalues));
5715 
5716     /* allocate buffers for sending j and a arrays */
5717     PetscCall(PetscMalloc1(len+1,&bufj));
5718     PetscCall(PetscMalloc1(len+1,&bufa));
5719 
5720     /* create i-array of B_oth */
5721     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5722 
5723     b_othi[0] = 0;
5724     len       = 0; /* total length of j or a array to be received */
5725     k         = 0;
5726     for (i=0; i<nrecvs; i++) {
5727       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5728       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5729       for (j=0; j<nrows; j++) {
5730         b_othi[k+1] = b_othi[k] + rowlen[j];
5731         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5732         k++;
5733       }
5734       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5735     }
5736     PetscCall(PetscFree(rvalues));
5737 
5738     /* allocate space for j and a arrays of B_oth */
5739     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5740     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5741 
5742     /* j-array */
5743     /*---------*/
5744     /*  post receives of j-array */
5745     for (i=0; i<nrecvs; i++) {
5746       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5747       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5748     }
5749 
5750     /* pack the outgoing message j-array */
5751     if (nsends) k = sstarts[0];
5752     for (i=0; i<nsends; i++) {
5753       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5754       bufJ  = bufj+sstartsj[i];
5755       for (j=0; j<nrows; j++) {
5756         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5757         for (ll=0; ll<sbs; ll++) {
5758           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5759           for (l=0; l<ncols; l++) {
5760             *bufJ++ = cols[l];
5761           }
5762           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5763         }
5764       }
5765       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5766     }
5767 
5768     /* recvs and sends of j-array are completed */
5769     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5770   } else if (scall == MAT_REUSE_MATRIX) {
5771     sstartsj = *startsj_s;
5772     rstartsj = *startsj_r;
5773     bufa     = *bufa_ptr;
5774     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5775     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5776   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5777 
5778   /* a-array */
5779   /*---------*/
5780   /*  post receives of a-array */
5781   for (i=0; i<nrecvs; i++) {
5782     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5783     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5784   }
5785 
5786   /* pack the outgoing message a-array */
5787   if (nsends) k = sstarts[0];
5788   for (i=0; i<nsends; i++) {
5789     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5790     bufA  = bufa+sstartsj[i];
5791     for (j=0; j<nrows; j++) {
5792       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5793       for (ll=0; ll<sbs; ll++) {
5794         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5795         for (l=0; l<ncols; l++) {
5796           *bufA++ = vals[l];
5797         }
5798         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5799       }
5800     }
5801     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5802   }
5803   /* recvs and sends of a-array are completed */
5804   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5805   PetscCall(PetscFree(reqs));
5806 
5807   if (scall == MAT_INITIAL_MATRIX) {
5808     /* put together the new matrix */
5809     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5810 
5811     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5812     /* Since these are PETSc arrays, change flags to free them as necessary. */
5813     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5814     b_oth->free_a  = PETSC_TRUE;
5815     b_oth->free_ij = PETSC_TRUE;
5816     b_oth->nonew   = 0;
5817 
5818     PetscCall(PetscFree(bufj));
5819     if (!startsj_s || !bufa_ptr) {
5820       PetscCall(PetscFree2(sstartsj,rstartsj));
5821       PetscCall(PetscFree(bufa_ptr));
5822     } else {
5823       *startsj_s = sstartsj;
5824       *startsj_r = rstartsj;
5825       *bufa_ptr  = bufa;
5826     }
5827   } else if (scall == MAT_REUSE_MATRIX) {
5828     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5829   }
5830 
5831   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5832   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5833   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5834   PetscFunctionReturn(0);
5835 }
5836 
5837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5838 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5839 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5840 #if defined(PETSC_HAVE_MKL_SPARSE)
5841 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5842 #endif
5843 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5844 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5845 #if defined(PETSC_HAVE_ELEMENTAL)
5846 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5847 #endif
5848 #if defined(PETSC_HAVE_SCALAPACK)
5849 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5850 #endif
5851 #if defined(PETSC_HAVE_HYPRE)
5852 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5853 #endif
5854 #if defined(PETSC_HAVE_CUDA)
5855 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5856 #endif
5857 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5858 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5859 #endif
5860 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5861 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5862 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5863 
5864 /*
5865     Computes (B'*A')' since computing B*A directly is untenable
5866 
5867                n                       p                          p
5868         [             ]       [             ]         [                 ]
5869       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5870         [             ]       [             ]         [                 ]
5871 
5872 */
5873 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5874 {
5875   Mat            At,Bt,Ct;
5876 
5877   PetscFunctionBegin;
5878   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5879   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5880   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5881   PetscCall(MatDestroy(&At));
5882   PetscCall(MatDestroy(&Bt));
5883   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5884   PetscCall(MatDestroy(&Ct));
5885   PetscFunctionReturn(0);
5886 }
5887 
5888 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5889 {
5890   PetscBool      cisdense;
5891 
5892   PetscFunctionBegin;
5893   PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5894   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5895   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5896   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5897   if (!cisdense) {
5898     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5899   }
5900   PetscCall(MatSetUp(C));
5901 
5902   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5903   PetscFunctionReturn(0);
5904 }
5905 
5906 /* ----------------------------------------------------------------*/
5907 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5908 {
5909   Mat_Product *product = C->product;
5910   Mat         A = product->A,B=product->B;
5911 
5912   PetscFunctionBegin;
5913   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5914     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5915 
5916   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5917   C->ops->productsymbolic = MatProductSymbolic_AB;
5918   PetscFunctionReturn(0);
5919 }
5920 
5921 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5922 {
5923   Mat_Product    *product = C->product;
5924 
5925   PetscFunctionBegin;
5926   if (product->type == MATPRODUCT_AB) {
5927     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5928   }
5929   PetscFunctionReturn(0);
5930 }
5931 
5932 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
5933    is greater than value, or last if there is no such element.
5934 */
5935 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
5936 {
5937   PetscCount  it,step,count = last - first;
5938 
5939   PetscFunctionBegin;
5940   while (count > 0) {
5941     it   = first;
5942     step = count / 2;
5943     it  += step;
5944     if (!(value < array[it])) {
5945       first  = ++it;
5946       count -= step + 1;
5947     } else count = step;
5948   }
5949   *upper = first;
5950   PetscFunctionReturn(0);
5951 }
5952 
5953 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix
5954 
5955   Input Parameters:
5956 
5957     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5958     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5959 
5960     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat
5961 
5962     For Set1, j1[] contains column indices of the nonzeros.
5963     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5964     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5965     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
5966 
5967     Similar for Set2.
5968 
5969     This routine merges the two sets of nonzeros row by row and removes repeats.
5970 
5971   Output Parameters: (memories are allocated by the caller)
5972 
5973     i[],j[]: the CSR of the merged matrix, which has m rows.
5974     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5975     imap2[]: similar to imap1[], but for Set2.
5976     Note we order nonzeros row-by-row and from left to right.
5977 */
5978 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
5979   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
5980   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
5981 {
5982   PetscInt       r,m; /* Row index of mat */
5983   PetscCount     t,t1,t2,b1,e1,b2,e2;
5984 
5985   PetscFunctionBegin;
5986   PetscCall(MatGetLocalSize(mat,&m,NULL));
5987   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
5988   i[0] = 0;
5989   for (r=0; r<m; r++) { /* Do row by row merging */
5990     b1   = rowBegin1[r];
5991     e1   = rowEnd1[r];
5992     b2   = rowBegin2[r];
5993     e2   = rowEnd2[r];
5994     while (b1 < e1 && b2 < e2) {
5995       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
5996         j[t]      = j1[b1];
5997         imap1[t1] = t;
5998         imap2[t2] = t;
5999         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6000         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6001         t1++; t2++; t++;
6002       } else if (j1[b1] < j2[b2]) {
6003         j[t]      = j1[b1];
6004         imap1[t1] = t;
6005         b1       += jmap1[t1+1] - jmap1[t1];
6006         t1++; t++;
6007       } else {
6008         j[t]      = j2[b2];
6009         imap2[t2] = t;
6010         b2       += jmap2[t2+1] - jmap2[t2];
6011         t2++; t++;
6012       }
6013     }
6014     /* Merge the remaining in either j1[] or j2[] */
6015     while (b1 < e1) {
6016       j[t]      = j1[b1];
6017       imap1[t1] = t;
6018       b1       += jmap1[t1+1] - jmap1[t1];
6019       t1++; t++;
6020     }
6021     while (b2 < e2) {
6022       j[t]      = j2[b2];
6023       imap2[t2] = t;
6024       b2       += jmap2[t2+1] - jmap2[t2];
6025       t2++; t++;
6026     }
6027     i[r+1] = t;
6028   }
6029   PetscFunctionReturn(0);
6030 }
6031 
6032 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block
6033 
6034   Input Parameters:
6035     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6036     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6037       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6038 
6039       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6040       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6041 
6042   Output Parameters:
6043     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6044     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6045       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6046       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6047 
6048     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6049       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6050         repeats (i.e., same 'i,j' pair).
6051       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6052         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6053 
6054       Atot: number of entries belonging to the diagonal block
6055       Annz: number of unique nonzeros belonging to the diagonal block.
6056 
6057     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6058 
6059     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
6060 */
6061 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6062   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6063   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6064   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6065 {
6066   PetscInt          cstart,cend,rstart,rend,row,col;
6067   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6068   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6069   PetscCount        k,m,p,q,r,s,mid;
6070   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6071 
6072   PetscFunctionBegin;
6073   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6074   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6075   m    = rend - rstart;
6076 
6077   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6078 
6079   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6080      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6081   */
6082   while (k<n) {
6083     row = i[k];
6084     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6085     for (s=k; s<n; s++) if (i[s] != row) break;
6086     for (p=k; p<s; p++) {
6087       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6088       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6089     }
6090     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6091     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6092     rowBegin[row-rstart] = k;
6093     rowMid[row-rstart]   = mid;
6094     rowEnd[row-rstart]   = s;
6095 
6096     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6097     Atot += mid - k;
6098     Btot += s - mid;
6099 
6100     /* Count unique nonzeros of this diag/offdiag row */
6101     for (p=k; p<mid;) {
6102       col = j[p];
6103       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6104       Annz++;
6105     }
6106 
6107     for (p=mid; p<s;) {
6108       col = j[p];
6109       do {p++;} while (p<s && j[p] == col);
6110       Bnnz++;
6111     }
6112     k = s;
6113   }
6114 
6115   /* Allocation according to Atot, Btot, Annz, Bnnz */
6116   PetscCall(PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap));
6117 
6118   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6119   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6120   for (r=0; r<m; r++) {
6121     k     = rowBegin[r];
6122     mid   = rowMid[r];
6123     s     = rowEnd[r];
6124     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6125     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6126     Atot += mid - k;
6127     Btot += s - mid;
6128 
6129     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6130     for (p=k; p<mid;) {
6131       col = j[p];
6132       q   = p;
6133       do {p++;} while (p<mid && j[p] == col);
6134       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6135       Annz++;
6136     }
6137 
6138     for (p=mid; p<s;) {
6139       col = j[p];
6140       q   = p;
6141       do {p++;} while (p<s && j[p] == col);
6142       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6143       Bnnz++;
6144     }
6145   }
6146   /* Output */
6147   *Aperm_ = Aperm;
6148   *Annz_  = Annz;
6149   *Atot_  = Atot;
6150   *Ajmap_ = Ajmap;
6151   *Bperm_ = Bperm;
6152   *Bnnz_  = Bnnz;
6153   *Btot_  = Btot;
6154   *Bjmap_ = Bjmap;
6155   PetscFunctionReturn(0);
6156 }
6157 
6158 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6159 {
6160   MPI_Comm                  comm;
6161   PetscMPIInt               rank,size;
6162   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6163   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6164   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6165 
6166   PetscFunctionBegin;
6167   PetscCall(PetscFree(mpiaij->garray));
6168   PetscCall(VecDestroy(&mpiaij->lvec));
6169 #if defined(PETSC_USE_CTABLE)
6170   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6171 #else
6172   PetscCall(PetscFree(mpiaij->colmap));
6173 #endif
6174   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6175   mat->assembled = PETSC_FALSE;
6176   mat->was_assembled = PETSC_FALSE;
6177   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6178 
6179   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6180   PetscCallMPI(MPI_Comm_size(comm,&size));
6181   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6182   PetscCall(PetscLayoutSetUp(mat->rmap));
6183   PetscCall(PetscLayoutSetUp(mat->cmap));
6184   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6185   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6186   PetscCall(MatGetLocalSize(mat,&m,&n));
6187   PetscCall(MatGetSize(mat,&M,&N));
6188 
6189   /* ---------------------------------------------------------------------------*/
6190   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6191   /* entries come first, then local rows, then remote rows.                     */
6192   /* ---------------------------------------------------------------------------*/
6193   PetscCount n1 = coo_n,*perm1;
6194   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6195   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6196   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6197   PetscCall(PetscArraycpy(j1,coo_j,n1));
6198   for (k=0; k<n1; k++) perm1[k] = k;
6199 
6200   /* Manipulate indices so that entries with negative row or col indices will have smallest
6201      row indices, local entries will have greater but negative row indices, and remote entries
6202      will have positive row indices.
6203   */
6204   for (k=0; k<n1; k++) {
6205     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6206     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6207     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6208     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6209   }
6210 
6211   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6212   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6213   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6214   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6215   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6216 
6217   /* ---------------------------------------------------------------------------*/
6218   /*           Split local rows into diag/offdiag portions                      */
6219   /* ---------------------------------------------------------------------------*/
6220   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6221   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6222   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6223 
6224   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6225   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6226   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6227 
6228   /* ---------------------------------------------------------------------------*/
6229   /*           Send remote rows to their owner                                  */
6230   /* ---------------------------------------------------------------------------*/
6231   /* Find which rows should be sent to which remote ranks*/
6232   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6233   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6234   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6235   const PetscInt *ranges;
6236   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6237 
6238   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6239   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6240   for (k=rem; k<n1;) {
6241     PetscMPIInt  owner;
6242     PetscInt     firstRow,lastRow;
6243 
6244     /* Locate a row range */
6245     firstRow = i1[k]; /* first row of this owner */
6246     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6247     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6248 
6249     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6250     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6251 
6252     /* All entries in [k,p) belong to this remote owner */
6253     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6254       PetscMPIInt *sendto2;
6255       PetscInt    *nentries2;
6256       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6257 
6258       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6259       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6260       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6261       PetscCall(PetscFree2(sendto,nentries2));
6262       sendto      = sendto2;
6263       nentries    = nentries2;
6264       maxNsend    = maxNsend2;
6265     }
6266     sendto[nsend]   = owner;
6267     nentries[nsend] = p - k;
6268     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6269     nsend++;
6270     k = p;
6271   }
6272 
6273   /* Build 1st SF to know offsets on remote to send data */
6274   PetscSF     sf1;
6275   PetscInt    nroots = 1,nroots2 = 0;
6276   PetscInt    nleaves = nsend,nleaves2 = 0;
6277   PetscInt    *offsets;
6278   PetscSFNode *iremote;
6279 
6280   PetscCall(PetscSFCreate(comm,&sf1));
6281   PetscCall(PetscMalloc1(nsend,&iremote));
6282   PetscCall(PetscMalloc1(nsend,&offsets));
6283   for (k=0; k<nsend; k++) {
6284     iremote[k].rank  = sendto[k];
6285     iremote[k].index = 0;
6286     nleaves2        += nentries[k];
6287     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6288   }
6289   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6290   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6291   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6292   PetscCall(PetscSFDestroy(&sf1));
6293   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);
6294 
6295   /* Build 2nd SF to send remote COOs to their owner */
6296   PetscSF sf2;
6297   nroots  = nroots2;
6298   nleaves = nleaves2;
6299   PetscCall(PetscSFCreate(comm,&sf2));
6300   PetscCall(PetscSFSetFromOptions(sf2));
6301   PetscCall(PetscMalloc1(nleaves,&iremote));
6302   p       = 0;
6303   for (k=0; k<nsend; k++) {
6304     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6305     for (q=0; q<nentries[k]; q++,p++) {
6306       iremote[p].rank  = sendto[k];
6307       iremote[p].index = offsets[k] + q;
6308     }
6309   }
6310   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6311 
6312   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6313   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6314 
6315   /* Send the remote COOs to their owner */
6316   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6317   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6318   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6319   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6320   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6321   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6322   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6323 
6324   PetscCall(PetscFree(offsets));
6325   PetscCall(PetscFree2(sendto,nentries));
6326 
6327   /* ---------------------------------------------------------------*/
6328   /* Sort received COOs by row along with the permutation array     */
6329   /* ---------------------------------------------------------------*/
6330   for (k=0; k<n2; k++) perm2[k] = k;
6331   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6332 
6333   /* ---------------------------------------------------------------*/
6334   /* Split received COOs into diag/offdiag portions                 */
6335   /* ---------------------------------------------------------------*/
6336   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6337   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6338   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6339 
6340   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6341   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6342 
6343   /* --------------------------------------------------------------------------*/
6344   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6345   /* --------------------------------------------------------------------------*/
6346   PetscInt   *Ai,*Bi;
6347   PetscInt   *Aj,*Bj;
6348 
6349   PetscCall(PetscMalloc1(m+1,&Ai));
6350   PetscCall(PetscMalloc1(m+1,&Bi));
6351   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6352   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6353 
6354   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6355   PetscCall(PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2));
6356 
6357   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6358   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6359   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6360   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6361   PetscCall(PetscFree3(i1,j1,perm1));
6362   PetscCall(PetscFree3(i2,j2,perm2));
6363 
6364   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6365   PetscInt Annz = Ai[m];
6366   PetscInt Bnnz = Bi[m];
6367   if (Annz < Annz1 + Annz2) {
6368     PetscInt *Aj_new;
6369     PetscCall(PetscMalloc1(Annz,&Aj_new));
6370     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6371     PetscCall(PetscFree(Aj));
6372     Aj   = Aj_new;
6373   }
6374 
6375   if (Bnnz < Bnnz1 + Bnnz2) {
6376     PetscInt *Bj_new;
6377     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6378     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6379     PetscCall(PetscFree(Bj));
6380     Bj   = Bj_new;
6381   }
6382 
6383   /* --------------------------------------------------------------------------------*/
6384   /* Create new submatrices for on-process and off-process coupling                  */
6385   /* --------------------------------------------------------------------------------*/
6386   PetscScalar   *Aa,*Ba;
6387   MatType       rtype;
6388   Mat_SeqAIJ    *a,*b;
6389   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6390   PetscCall(PetscCalloc1(Bnnz,&Ba));
6391   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6392   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6393   PetscCall(MatDestroy(&mpiaij->A));
6394   PetscCall(MatDestroy(&mpiaij->B));
6395   PetscCall(MatGetRootType_Private(mat,&rtype));
6396   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6397   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6398   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6399 
6400   a = (Mat_SeqAIJ*)mpiaij->A->data;
6401   b = (Mat_SeqAIJ*)mpiaij->B->data;
6402   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6403   a->free_a       = b->free_a       = PETSC_TRUE;
6404   a->free_ij      = b->free_ij      = PETSC_TRUE;
6405 
6406   /* conversion must happen AFTER multiply setup */
6407   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6408   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6409   PetscCall(VecDestroy(&mpiaij->lvec));
6410   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6411   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6412 
6413   mpiaij->coo_n   = coo_n;
6414   mpiaij->coo_sf  = sf2;
6415   mpiaij->sendlen = nleaves;
6416   mpiaij->recvlen = nroots;
6417 
6418   mpiaij->Annz1   = Annz1;
6419   mpiaij->Annz2   = Annz2;
6420   mpiaij->Bnnz1   = Bnnz1;
6421   mpiaij->Bnnz2   = Bnnz2;
6422 
6423   mpiaij->Atot1   = Atot1;
6424   mpiaij->Atot2   = Atot2;
6425   mpiaij->Btot1   = Btot1;
6426   mpiaij->Btot2   = Btot2;
6427 
6428   mpiaij->Aimap1  = Aimap1;
6429   mpiaij->Aimap2  = Aimap2;
6430   mpiaij->Bimap1  = Bimap1;
6431   mpiaij->Bimap2  = Bimap2;
6432 
6433   mpiaij->Ajmap1  = Ajmap1;
6434   mpiaij->Ajmap2  = Ajmap2;
6435   mpiaij->Bjmap1  = Bjmap1;
6436   mpiaij->Bjmap2  = Bjmap2;
6437 
6438   mpiaij->Aperm1  = Aperm1;
6439   mpiaij->Aperm2  = Aperm2;
6440   mpiaij->Bperm1  = Bperm1;
6441   mpiaij->Bperm2  = Bperm2;
6442 
6443   mpiaij->Cperm1  = Cperm1;
6444 
6445   /* Allocate in preallocation. If not used, it has zero cost on host */
6446   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6447   PetscFunctionReturn(0);
6448 }
6449 
6450 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6451 {
6452   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6453   Mat                  A = mpiaij->A,B = mpiaij->B;
6454   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6455   PetscScalar          *Aa,*Ba;
6456   PetscScalar          *sendbuf = mpiaij->sendbuf;
6457   PetscScalar          *recvbuf = mpiaij->recvbuf;
6458   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6459   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6460   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6461   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6462 
6463   PetscFunctionBegin;
6464   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6465   PetscCall(MatSeqAIJGetArray(B,&Ba));
6466   if (imode == INSERT_VALUES) {
6467     PetscCall(PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar)));
6468     PetscCall(PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar)));
6469   }
6470 
6471   /* Pack entries to be sent to remote */
6472   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6473 
6474   /* Send remote entries to their owner and overlap the communication with local computation */
6475   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6476   /* Add local entries to A and B */
6477   for (PetscCount i=0; i<Annz1; i++) {
6478     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6479   }
6480   for (PetscCount i=0; i<Bnnz1; i++) {
6481     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6482   }
6483   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6484 
6485   /* Add received remote entries to A and B */
6486   for (PetscCount i=0; i<Annz2; i++) {
6487     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6488   }
6489   for (PetscCount i=0; i<Bnnz2; i++) {
6490     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6491   }
6492   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6493   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6494   PetscFunctionReturn(0);
6495 }
6496 
6497 /* ----------------------------------------------------------------*/
6498 
6499 /*MC
6500    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6501 
6502    Options Database Keys:
6503 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6504 
6505    Level: beginner
6506 
6507    Notes:
6508     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6509     in this case the values associated with the rows and columns one passes in are set to zero
6510     in the matrix
6511 
6512     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6513     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6514 
6515 .seealso: MatCreateAIJ()
6516 M*/
6517 
6518 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6519 {
6520   Mat_MPIAIJ     *b;
6521   PetscMPIInt    size;
6522 
6523   PetscFunctionBegin;
6524   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6525 
6526   PetscCall(PetscNewLog(B,&b));
6527   B->data       = (void*)b;
6528   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6529   B->assembled  = PETSC_FALSE;
6530   B->insertmode = NOT_SET_VALUES;
6531   b->size       = size;
6532 
6533   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6534 
6535   /* build cache for off array entries formed */
6536   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6537 
6538   b->donotstash  = PETSC_FALSE;
6539   b->colmap      = NULL;
6540   b->garray      = NULL;
6541   b->roworiented = PETSC_TRUE;
6542 
6543   /* stuff used for matrix vector multiply */
6544   b->lvec  = NULL;
6545   b->Mvctx = NULL;
6546 
6547   /* stuff for MatGetRow() */
6548   b->rowindices   = NULL;
6549   b->rowvalues    = NULL;
6550   b->getrowactive = PETSC_FALSE;
6551 
6552   /* flexible pointer used in CUSPARSE classes */
6553   b->spptr = NULL;
6554 
6555   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6556   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6557   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6558   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6559   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6560   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6561   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6562   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6563   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6564   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6565 #if defined(PETSC_HAVE_CUDA)
6566   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6567 #endif
6568 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6569   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6570 #endif
6571 #if defined(PETSC_HAVE_MKL_SPARSE)
6572   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6573 #endif
6574   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6575   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6576   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6577   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6578 #if defined(PETSC_HAVE_ELEMENTAL)
6579   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6580 #endif
6581 #if defined(PETSC_HAVE_SCALAPACK)
6582   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6583 #endif
6584   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6585   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6586 #if defined(PETSC_HAVE_HYPRE)
6587   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6588   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6589 #endif
6590   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6591   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6592   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6593   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6594   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6595   PetscFunctionReturn(0);
6596 }
6597 
6598 /*@C
6599      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6600          and "off-diagonal" part of the matrix in CSR format.
6601 
6602    Collective
6603 
6604    Input Parameters:
6605 +  comm - MPI communicator
6606 .  m - number of local rows (Cannot be PETSC_DECIDE)
6607 .  n - This value should be the same as the local size used in creating the
6608        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6609        calculated if N is given) For square matrices n is almost always m.
6610 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6611 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6612 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6613 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6614 .   a - matrix values
6615 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6616 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6617 -   oa - matrix values
6618 
6619    Output Parameter:
6620 .   mat - the matrix
6621 
6622    Level: advanced
6623 
6624    Notes:
6625        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6626        must free the arrays once the matrix has been destroyed and not before.
6627 
6628        The i and j indices are 0 based
6629 
6630        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6631 
6632        This sets local rows and cannot be used to set off-processor values.
6633 
6634        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6635        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6636        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6637        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6638        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6639        communication if it is known that only local entries will be set.
6640 
6641 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6642           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6643 @*/
6644 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6645 {
6646   Mat_MPIAIJ     *maij;
6647 
6648   PetscFunctionBegin;
6649   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6650   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6651   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6652   PetscCall(MatCreate(comm,mat));
6653   PetscCall(MatSetSizes(*mat,m,n,M,N));
6654   PetscCall(MatSetType(*mat,MATMPIAIJ));
6655   maij = (Mat_MPIAIJ*) (*mat)->data;
6656 
6657   (*mat)->preallocated = PETSC_TRUE;
6658 
6659   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6660   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6661 
6662   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6663   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6664 
6665   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6666   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6667   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6668   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6669   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6670   PetscFunctionReturn(0);
6671 }
6672 
6673 typedef struct {
6674   Mat       *mp;    /* intermediate products */
6675   PetscBool *mptmp; /* is the intermediate product temporary ? */
6676   PetscInt  cp;     /* number of intermediate products */
6677 
6678   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6679   PetscInt    *startsj_s,*startsj_r;
6680   PetscScalar *bufa;
6681   Mat         P_oth;
6682 
6683   /* may take advantage of merging product->B */
6684   Mat Bloc; /* B-local by merging diag and off-diag */
6685 
6686   /* cusparse does not have support to split between symbolic and numeric phases.
6687      When api_user is true, we don't need to update the numerical values
6688      of the temporary storage */
6689   PetscBool reusesym;
6690 
6691   /* support for COO values insertion */
6692   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6693   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6694   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6695   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6696   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6697   PetscMemType mtype;
6698 
6699   /* customization */
6700   PetscBool abmerge;
6701   PetscBool P_oth_bind;
6702 } MatMatMPIAIJBACKEND;
6703 
6704 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6705 {
6706   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6707   PetscInt            i;
6708 
6709   PetscFunctionBegin;
6710   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6711   PetscCall(PetscFree(mmdata->bufa));
6712   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6713   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6714   PetscCall(MatDestroy(&mmdata->P_oth));
6715   PetscCall(MatDestroy(&mmdata->Bloc));
6716   PetscCall(PetscSFDestroy(&mmdata->sf));
6717   for (i = 0; i < mmdata->cp; i++) {
6718     PetscCall(MatDestroy(&mmdata->mp[i]));
6719   }
6720   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6721   PetscCall(PetscFree(mmdata->own[0]));
6722   PetscCall(PetscFree(mmdata->own));
6723   PetscCall(PetscFree(mmdata->off[0]));
6724   PetscCall(PetscFree(mmdata->off));
6725   PetscCall(PetscFree(mmdata));
6726   PetscFunctionReturn(0);
6727 }
6728 
6729 /* Copy selected n entries with indices in idx[] of A to v[].
6730    If idx is NULL, copy the whole data array of A to v[]
6731  */
6732 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6733 {
6734   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6735 
6736   PetscFunctionBegin;
6737   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6738   if (f) {
6739     PetscCall((*f)(A,n,idx,v));
6740   } else {
6741     const PetscScalar *vv;
6742 
6743     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6744     if (n && idx) {
6745       PetscScalar    *w = v;
6746       const PetscInt *oi = idx;
6747       PetscInt       j;
6748 
6749       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6750     } else {
6751       PetscCall(PetscArraycpy(v,vv,n));
6752     }
6753     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6754   }
6755   PetscFunctionReturn(0);
6756 }
6757 
6758 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6759 {
6760   MatMatMPIAIJBACKEND *mmdata;
6761   PetscInt            i,n_d,n_o;
6762 
6763   PetscFunctionBegin;
6764   MatCheckProduct(C,1);
6765   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6766   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6767   if (!mmdata->reusesym) { /* update temporary matrices */
6768     if (mmdata->P_oth) {
6769       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6770     }
6771     if (mmdata->Bloc) {
6772       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6773     }
6774   }
6775   mmdata->reusesym = PETSC_FALSE;
6776 
6777   for (i = 0; i < mmdata->cp; i++) {
6778     PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6779     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6780   }
6781   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6782     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6783 
6784     if (mmdata->mptmp[i]) continue;
6785     if (noff) {
6786       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6787 
6788       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6789       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6790       n_o += noff;
6791       n_d += nown;
6792     } else {
6793       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6794 
6795       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6796       n_d += mm->nz;
6797     }
6798   }
6799   if (mmdata->hasoffproc) { /* offprocess insertion */
6800     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6801     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6802   }
6803   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6804   PetscFunctionReturn(0);
6805 }
6806 
6807 /* Support for Pt * A, A * P, or Pt * A * P */
6808 #define MAX_NUMBER_INTERMEDIATE 4
6809 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6810 {
6811   Mat_Product            *product = C->product;
6812   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6813   Mat_MPIAIJ             *a,*p;
6814   MatMatMPIAIJBACKEND    *mmdata;
6815   ISLocalToGlobalMapping P_oth_l2g = NULL;
6816   IS                     glob = NULL;
6817   const char             *prefix;
6818   char                   pprefix[256];
6819   const PetscInt         *globidx,*P_oth_idx;
6820   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6821   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6822   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6823                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6824                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6825   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6826 
6827   MatProductType         ptype;
6828   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6829   PetscMPIInt            size;
6830   PetscErrorCode         ierr;
6831 
6832   PetscFunctionBegin;
6833   MatCheckProduct(C,1);
6834   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6835   ptype = product->type;
6836   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6837     ptype = MATPRODUCT_AB;
6838     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6839   }
6840   switch (ptype) {
6841   case MATPRODUCT_AB:
6842     A = product->A;
6843     P = product->B;
6844     m = A->rmap->n;
6845     n = P->cmap->n;
6846     M = A->rmap->N;
6847     N = P->cmap->N;
6848     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6849     break;
6850   case MATPRODUCT_AtB:
6851     P = product->A;
6852     A = product->B;
6853     m = P->cmap->n;
6854     n = A->cmap->n;
6855     M = P->cmap->N;
6856     N = A->cmap->N;
6857     hasoffproc = PETSC_TRUE;
6858     break;
6859   case MATPRODUCT_PtAP:
6860     A = product->A;
6861     P = product->B;
6862     m = P->cmap->n;
6863     n = P->cmap->n;
6864     M = P->cmap->N;
6865     N = P->cmap->N;
6866     hasoffproc = PETSC_TRUE;
6867     break;
6868   default:
6869     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6870   }
6871   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6872   if (size == 1) hasoffproc = PETSC_FALSE;
6873 
6874   /* defaults */
6875   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6876     mp[i]    = NULL;
6877     mptmp[i] = PETSC_FALSE;
6878     rmapt[i] = -1;
6879     cmapt[i] = -1;
6880     rmapa[i] = NULL;
6881     cmapa[i] = NULL;
6882   }
6883 
6884   /* customization */
6885   PetscCall(PetscNew(&mmdata));
6886   mmdata->reusesym = product->api_user;
6887   if (ptype == MATPRODUCT_AB) {
6888     if (product->api_user) {
6889       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");PetscCall(ierr);
6890       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6891       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6892       ierr = PetscOptionsEnd();PetscCall(ierr);
6893     } else {
6894       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");PetscCall(ierr);
6895       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6896       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6897       ierr = PetscOptionsEnd();PetscCall(ierr);
6898     }
6899   } else if (ptype == MATPRODUCT_PtAP) {
6900     if (product->api_user) {
6901       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");PetscCall(ierr);
6902       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6903       ierr = PetscOptionsEnd();PetscCall(ierr);
6904     } else {
6905       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr);
6906       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6907       ierr = PetscOptionsEnd();PetscCall(ierr);
6908     }
6909   }
6910   a = (Mat_MPIAIJ*)A->data;
6911   p = (Mat_MPIAIJ*)P->data;
6912   PetscCall(MatSetSizes(C,m,n,M,N));
6913   PetscCall(PetscLayoutSetUp(C->rmap));
6914   PetscCall(PetscLayoutSetUp(C->cmap));
6915   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6916   PetscCall(MatGetOptionsPrefix(C,&prefix));
6917 
6918   cp   = 0;
6919   switch (ptype) {
6920   case MATPRODUCT_AB: /* A * P */
6921     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6922 
6923     /* A_diag * P_local (merged or not) */
6924     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6925       /* P is product->B */
6926       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
6927       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
6928       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6929       PetscCall(MatProductSetFill(mp[cp],product->fill));
6930       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6931       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6932       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6933       mp[cp]->product->api_user = product->api_user;
6934       PetscCall(MatProductSetFromOptions(mp[cp]));
6935       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6936       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6937       PetscCall(ISGetIndices(glob,&globidx));
6938       rmapt[cp] = 1;
6939       cmapt[cp] = 2;
6940       cmapa[cp] = globidx;
6941       mptmp[cp] = PETSC_FALSE;
6942       cp++;
6943     } else { /* A_diag * P_diag and A_diag * P_off */
6944       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
6945       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6946       PetscCall(MatProductSetFill(mp[cp],product->fill));
6947       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6948       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6949       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6950       mp[cp]->product->api_user = product->api_user;
6951       PetscCall(MatProductSetFromOptions(mp[cp]));
6952       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6953       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6954       rmapt[cp] = 1;
6955       cmapt[cp] = 1;
6956       mptmp[cp] = PETSC_FALSE;
6957       cp++;
6958       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
6959       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6960       PetscCall(MatProductSetFill(mp[cp],product->fill));
6961       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6962       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6963       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6964       mp[cp]->product->api_user = product->api_user;
6965       PetscCall(MatProductSetFromOptions(mp[cp]));
6966       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6967       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6968       rmapt[cp] = 1;
6969       cmapt[cp] = 2;
6970       cmapa[cp] = p->garray;
6971       mptmp[cp] = PETSC_FALSE;
6972       cp++;
6973     }
6974 
6975     /* A_off * P_other */
6976     if (mmdata->P_oth) {
6977       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
6978       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
6979       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
6980       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
6981       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
6982       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6983       PetscCall(MatProductSetFill(mp[cp],product->fill));
6984       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6985       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6986       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6987       mp[cp]->product->api_user = product->api_user;
6988       PetscCall(MatProductSetFromOptions(mp[cp]));
6989       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6990       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6991       rmapt[cp] = 1;
6992       cmapt[cp] = 2;
6993       cmapa[cp] = P_oth_idx;
6994       mptmp[cp] = PETSC_FALSE;
6995       cp++;
6996     }
6997     break;
6998 
6999   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7000     /* A is product->B */
7001     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7002     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7003       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7004       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7005       PetscCall(MatProductSetFill(mp[cp],product->fill));
7006       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7007       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7008       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7009       mp[cp]->product->api_user = product->api_user;
7010       PetscCall(MatProductSetFromOptions(mp[cp]));
7011       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7012       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7013       PetscCall(ISGetIndices(glob,&globidx));
7014       rmapt[cp] = 2;
7015       rmapa[cp] = globidx;
7016       cmapt[cp] = 2;
7017       cmapa[cp] = globidx;
7018       mptmp[cp] = PETSC_FALSE;
7019       cp++;
7020     } else {
7021       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7022       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7023       PetscCall(MatProductSetFill(mp[cp],product->fill));
7024       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7025       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7026       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7027       mp[cp]->product->api_user = product->api_user;
7028       PetscCall(MatProductSetFromOptions(mp[cp]));
7029       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7030       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7031       PetscCall(ISGetIndices(glob,&globidx));
7032       rmapt[cp] = 1;
7033       cmapt[cp] = 2;
7034       cmapa[cp] = globidx;
7035       mptmp[cp] = PETSC_FALSE;
7036       cp++;
7037       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7038       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7039       PetscCall(MatProductSetFill(mp[cp],product->fill));
7040       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7041       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7042       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7043       mp[cp]->product->api_user = product->api_user;
7044       PetscCall(MatProductSetFromOptions(mp[cp]));
7045       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7046       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7047       rmapt[cp] = 2;
7048       rmapa[cp] = p->garray;
7049       cmapt[cp] = 2;
7050       cmapa[cp] = globidx;
7051       mptmp[cp] = PETSC_FALSE;
7052       cp++;
7053     }
7054     break;
7055   case MATPRODUCT_PtAP:
7056     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7057     /* P is product->B */
7058     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7059     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7060     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7061     PetscCall(MatProductSetFill(mp[cp],product->fill));
7062     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7063     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7064     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7065     mp[cp]->product->api_user = product->api_user;
7066     PetscCall(MatProductSetFromOptions(mp[cp]));
7067     PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7068     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7069     PetscCall(ISGetIndices(glob,&globidx));
7070     rmapt[cp] = 2;
7071     rmapa[cp] = globidx;
7072     cmapt[cp] = 2;
7073     cmapa[cp] = globidx;
7074     mptmp[cp] = PETSC_FALSE;
7075     cp++;
7076     if (mmdata->P_oth) {
7077       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7078       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7079       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7080       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7081       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7082       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7083       PetscCall(MatProductSetFill(mp[cp],product->fill));
7084       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7085       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7086       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7087       mp[cp]->product->api_user = product->api_user;
7088       PetscCall(MatProductSetFromOptions(mp[cp]));
7089       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7090       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7091       mptmp[cp] = PETSC_TRUE;
7092       cp++;
7093       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7094       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7095       PetscCall(MatProductSetFill(mp[cp],product->fill));
7096       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7097       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7098       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7099       mp[cp]->product->api_user = product->api_user;
7100       PetscCall(MatProductSetFromOptions(mp[cp]));
7101       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7102       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7103       rmapt[cp] = 2;
7104       rmapa[cp] = globidx;
7105       cmapt[cp] = 2;
7106       cmapa[cp] = P_oth_idx;
7107       mptmp[cp] = PETSC_FALSE;
7108       cp++;
7109     }
7110     break;
7111   default:
7112     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7113   }
7114   /* sanity check */
7115   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7116 
7117   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7118   for (i = 0; i < cp; i++) {
7119     mmdata->mp[i]    = mp[i];
7120     mmdata->mptmp[i] = mptmp[i];
7121   }
7122   mmdata->cp = cp;
7123   C->product->data       = mmdata;
7124   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7125   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7126 
7127   /* memory type */
7128   mmdata->mtype = PETSC_MEMTYPE_HOST;
7129   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7130   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7131   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7132   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7133 
7134   /* prepare coo coordinates for values insertion */
7135 
7136   /* count total nonzeros of those intermediate seqaij Mats
7137     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7138     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7139     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7140   */
7141   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7142     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7143     if (mptmp[cp]) continue;
7144     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7145       const PetscInt *rmap = rmapa[cp];
7146       const PetscInt mr = mp[cp]->rmap->n;
7147       const PetscInt rs = C->rmap->rstart;
7148       const PetscInt re = C->rmap->rend;
7149       const PetscInt *ii  = mm->i;
7150       for (i = 0; i < mr; i++) {
7151         const PetscInt gr = rmap[i];
7152         const PetscInt nz = ii[i+1] - ii[i];
7153         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7154         else ncoo_oown += nz; /* this row is local */
7155       }
7156     } else ncoo_d += mm->nz;
7157   }
7158 
7159   /*
7160     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7161 
7162     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7163 
7164     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7165 
7166     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7167     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7168     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7169 
7170     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7171     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7172   */
7173   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7174   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7175 
7176   /* gather (i,j) of nonzeros inserted by remote procs */
7177   if (hasoffproc) {
7178     PetscSF  msf;
7179     PetscInt ncoo2,*coo_i2,*coo_j2;
7180 
7181     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7182     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7183     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7184 
7185     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7186       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7187       PetscInt   *idxoff = mmdata->off[cp];
7188       PetscInt   *idxown = mmdata->own[cp];
7189       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7190         const PetscInt *rmap = rmapa[cp];
7191         const PetscInt *cmap = cmapa[cp];
7192         const PetscInt *ii  = mm->i;
7193         PetscInt       *coi = coo_i + ncoo_o;
7194         PetscInt       *coj = coo_j + ncoo_o;
7195         const PetscInt mr = mp[cp]->rmap->n;
7196         const PetscInt rs = C->rmap->rstart;
7197         const PetscInt re = C->rmap->rend;
7198         const PetscInt cs = C->cmap->rstart;
7199         for (i = 0; i < mr; i++) {
7200           const PetscInt *jj = mm->j + ii[i];
7201           const PetscInt gr  = rmap[i];
7202           const PetscInt nz  = ii[i+1] - ii[i];
7203           if (gr < rs || gr >= re) { /* this is an offproc row */
7204             for (j = ii[i]; j < ii[i+1]; j++) {
7205               *coi++ = gr;
7206               *idxoff++ = j;
7207             }
7208             if (!cmapt[cp]) { /* already global */
7209               for (j = 0; j < nz; j++) *coj++ = jj[j];
7210             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7211               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7212             } else { /* offdiag */
7213               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7214             }
7215             ncoo_o += nz;
7216           } else { /* this is a local row */
7217             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7218           }
7219         }
7220       }
7221       mmdata->off[cp + 1] = idxoff;
7222       mmdata->own[cp + 1] = idxown;
7223     }
7224 
7225     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7226     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7227     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7228     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7229     ncoo = ncoo_d + ncoo_oown + ncoo2;
7230     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7231     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7232     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7233     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7234     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7235     PetscCall(PetscFree2(coo_i,coo_j));
7236     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7237     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7238     coo_i = coo_i2;
7239     coo_j = coo_j2;
7240   } else { /* no offproc values insertion */
7241     ncoo = ncoo_d;
7242     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7243 
7244     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7245     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7246     PetscCall(PetscSFSetUp(mmdata->sf));
7247   }
7248   mmdata->hasoffproc = hasoffproc;
7249 
7250   /* gather (i,j) of nonzeros inserted locally */
7251   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7252     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7253     PetscInt       *coi = coo_i + ncoo_d;
7254     PetscInt       *coj = coo_j + ncoo_d;
7255     const PetscInt *jj  = mm->j;
7256     const PetscInt *ii  = mm->i;
7257     const PetscInt *cmap = cmapa[cp];
7258     const PetscInt *rmap = rmapa[cp];
7259     const PetscInt mr = mp[cp]->rmap->n;
7260     const PetscInt rs = C->rmap->rstart;
7261     const PetscInt re = C->rmap->rend;
7262     const PetscInt cs = C->cmap->rstart;
7263 
7264     if (mptmp[cp]) continue;
7265     if (rmapt[cp] == 1) { /* consecutive rows */
7266       /* fill coo_i */
7267       for (i = 0; i < mr; i++) {
7268         const PetscInt gr = i + rs;
7269         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7270       }
7271       /* fill coo_j */
7272       if (!cmapt[cp]) { /* type-0, already global */
7273         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7274       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7275         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7276       } else { /* type-2, local to global for sparse columns */
7277         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7278       }
7279       ncoo_d += mm->nz;
7280     } else if (rmapt[cp] == 2) { /* sparse rows */
7281       for (i = 0; i < mr; i++) {
7282         const PetscInt *jj = mm->j + ii[i];
7283         const PetscInt gr  = rmap[i];
7284         const PetscInt nz  = ii[i+1] - ii[i];
7285         if (gr >= rs && gr < re) { /* local rows */
7286           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7287           if (!cmapt[cp]) { /* type-0, already global */
7288             for (j = 0; j < nz; j++) *coj++ = jj[j];
7289           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7290             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7291           } else { /* type-2, local to global for sparse columns */
7292             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7293           }
7294           ncoo_d += nz;
7295         }
7296       }
7297     }
7298   }
7299   if (glob) {
7300     PetscCall(ISRestoreIndices(glob,&globidx));
7301   }
7302   PetscCall(ISDestroy(&glob));
7303   if (P_oth_l2g) {
7304     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7305   }
7306   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7307   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7308   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7309 
7310   /* preallocate with COO data */
7311   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7312   PetscCall(PetscFree2(coo_i,coo_j));
7313   PetscFunctionReturn(0);
7314 }
7315 
7316 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7317 {
7318   Mat_Product *product = mat->product;
7319 #if defined(PETSC_HAVE_DEVICE)
7320   PetscBool    match   = PETSC_FALSE;
7321   PetscBool    usecpu  = PETSC_FALSE;
7322 #else
7323   PetscBool    match   = PETSC_TRUE;
7324 #endif
7325 
7326   PetscFunctionBegin;
7327   MatCheckProduct(mat,1);
7328 #if defined(PETSC_HAVE_DEVICE)
7329   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7330     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7331   }
7332   if (match) { /* we can always fallback to the CPU if requested */
7333     PetscErrorCode ierr;
7334     switch (product->type) {
7335     case MATPRODUCT_AB:
7336       if (product->api_user) {
7337         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");PetscCall(ierr);
7338         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7339         ierr = PetscOptionsEnd();PetscCall(ierr);
7340       } else {
7341         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");PetscCall(ierr);
7342         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7343         ierr = PetscOptionsEnd();PetscCall(ierr);
7344       }
7345       break;
7346     case MATPRODUCT_AtB:
7347       if (product->api_user) {
7348         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");PetscCall(ierr);
7349         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7350         ierr = PetscOptionsEnd();PetscCall(ierr);
7351       } else {
7352         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");PetscCall(ierr);
7353         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7354         ierr = PetscOptionsEnd();PetscCall(ierr);
7355       }
7356       break;
7357     case MATPRODUCT_PtAP:
7358       if (product->api_user) {
7359         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");PetscCall(ierr);
7360         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7361         ierr = PetscOptionsEnd();PetscCall(ierr);
7362       } else {
7363         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr);
7364         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7365         ierr = PetscOptionsEnd();PetscCall(ierr);
7366       }
7367       break;
7368     default:
7369       break;
7370     }
7371     match = (PetscBool)!usecpu;
7372   }
7373 #endif
7374   if (match) {
7375     switch (product->type) {
7376     case MATPRODUCT_AB:
7377     case MATPRODUCT_AtB:
7378     case MATPRODUCT_PtAP:
7379       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7380       break;
7381     default:
7382       break;
7383     }
7384   }
7385   /* fallback to MPIAIJ ops */
7386   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7387   PetscFunctionReturn(0);
7388 }
7389 
7390 /*
7391     Special version for direct calls from Fortran
7392 */
7393 #include <petsc/private/fortranimpl.h>
7394 
7395 /* Change these macros so can be used in void function */
7396 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7397 #undef  PetscCall
7398 #define PetscCall(...) do {                                                                    \
7399     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7400     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7401       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7402       return;                                                                                  \
7403     }                                                                                          \
7404   } while (0)
7405 
7406 #undef SETERRQ
7407 #define SETERRQ(comm,ierr,...) do {                                                            \
7408     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7409     return;                                                                                    \
7410   } while (0)
7411 
7412 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7413 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7414 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7415 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7416 #else
7417 #endif
7418 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7419 {
7420   Mat          mat  = *mmat;
7421   PetscInt     m    = *mm, n = *mn;
7422   InsertMode   addv = *maddv;
7423   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7424   PetscScalar  value;
7425 
7426   MatCheckPreallocated(mat,1);
7427   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7428   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7429   {
7430     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7431     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7432     PetscBool roworiented = aij->roworiented;
7433 
7434     /* Some Variables required in the macro */
7435     Mat        A                    = aij->A;
7436     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7437     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7438     MatScalar  *aa;
7439     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7440     Mat        B                    = aij->B;
7441     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7442     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7443     MatScalar  *ba;
7444     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7445      * cannot use "#if defined" inside a macro. */
7446     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7447 
7448     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7449     PetscInt  nonew = a->nonew;
7450     MatScalar *ap1,*ap2;
7451 
7452     PetscFunctionBegin;
7453     PetscCall(MatSeqAIJGetArray(A,&aa));
7454     PetscCall(MatSeqAIJGetArray(B,&ba));
7455     for (i=0; i<m; i++) {
7456       if (im[i] < 0) continue;
7457       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7458       if (im[i] >= rstart && im[i] < rend) {
7459         row      = im[i] - rstart;
7460         lastcol1 = -1;
7461         rp1      = aj + ai[row];
7462         ap1      = aa + ai[row];
7463         rmax1    = aimax[row];
7464         nrow1    = ailen[row];
7465         low1     = 0;
7466         high1    = nrow1;
7467         lastcol2 = -1;
7468         rp2      = bj + bi[row];
7469         ap2      = ba + bi[row];
7470         rmax2    = bimax[row];
7471         nrow2    = bilen[row];
7472         low2     = 0;
7473         high2    = nrow2;
7474 
7475         for (j=0; j<n; j++) {
7476           if (roworiented) value = v[i*n+j];
7477           else value = v[i+j*m];
7478           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7479           if (in[j] >= cstart && in[j] < cend) {
7480             col = in[j] - cstart;
7481             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7482           } else if (in[j] < 0) continue;
7483           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7484             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7485             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7486           } else {
7487             if (mat->was_assembled) {
7488               if (!aij->colmap) {
7489                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7490               }
7491 #if defined(PETSC_USE_CTABLE)
7492               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
7493               col--;
7494 #else
7495               col = aij->colmap[in[j]] - 1;
7496 #endif
7497               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7498                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7499                 col  =  in[j];
7500                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7501                 B        = aij->B;
7502                 b        = (Mat_SeqAIJ*)B->data;
7503                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7504                 rp2      = bj + bi[row];
7505                 ap2      = ba + bi[row];
7506                 rmax2    = bimax[row];
7507                 nrow2    = bilen[row];
7508                 low2     = 0;
7509                 high2    = nrow2;
7510                 bm       = aij->B->rmap->n;
7511                 ba       = b->a;
7512                 inserted = PETSC_FALSE;
7513               }
7514             } else col = in[j];
7515             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7516           }
7517         }
7518       } else if (!aij->donotstash) {
7519         if (roworiented) {
7520           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7521         } else {
7522           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7523         }
7524       }
7525     }
7526     PetscCall(MatSeqAIJRestoreArray(A,&aa));
7527     PetscCall(MatSeqAIJRestoreArray(B,&ba));
7528   }
7529   PetscFunctionReturnVoid();
7530 }
7531 /* Undefining these here since they were redefined from their original definition above! No
7532  * other PETSc functions should be defined past this point, as it is impossible to recover the
7533  * original definitions */
7534 #undef PetscCall
7535 #undef SETERRQ
7536