xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ef46b1a67e276116c83b5d4ce8efc2932ea4fc0a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50 
51   PetscFunctionBegin;
52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
53   A->boundtocpu = flg;
54 #endif
55   if (a->A) {
56     PetscCall(MatBindToCPU(a->A,flg));
57   }
58   if (a->B) {
59     PetscCall(MatBindToCPU(a->B,flg));
60   }
61 
62   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
63    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
64    * to differ from the parent matrix. */
65   if (a->lvec) {
66     PetscCall(VecBindToCPU(a->lvec,flg));
67   }
68   if (a->diag) {
69     PetscCall(VecBindToCPU(a->diag,flg));
70   }
71 
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
76 {
77   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
78 
79   PetscFunctionBegin;
80   if (mat->A) {
81     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
82     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
88 {
89   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
90   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
91   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
92   const PetscInt  *ia,*ib;
93   const MatScalar *aa,*bb,*aav,*bav;
94   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
95   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
96 
97   PetscFunctionBegin;
98   *keptrows = NULL;
99 
100   ia   = a->i;
101   ib   = b->i;
102   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
103   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
104   for (i=0; i<m; i++) {
105     na = ia[i+1] - ia[i];
106     nb = ib[i+1] - ib[i];
107     if (!na && !nb) {
108       cnt++;
109       goto ok1;
110     }
111     aa = aav + ia[i];
112     for (j=0; j<na; j++) {
113       if (aa[j] != 0.0) goto ok1;
114     }
115     bb = bav + ib[i];
116     for (j=0; j <nb; j++) {
117       if (bb[j] != 0.0) goto ok1;
118     }
119     cnt++;
120 ok1:;
121   }
122   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
123   if (!n0rows) {
124     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
125     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
126     PetscFunctionReturn(0);
127   }
128   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
129   cnt  = 0;
130   for (i=0; i<m; i++) {
131     na = ia[i+1] - ia[i];
132     nb = ib[i+1] - ib[i];
133     if (!na && !nb) continue;
134     aa = aav + ia[i];
135     for (j=0; j<na;j++) {
136       if (aa[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141     bb = bav + ib[i];
142     for (j=0; j<nb; j++) {
143       if (bb[j] != 0.0) {
144         rows[cnt++] = rstart + i;
145         goto ok2;
146       }
147     }
148 ok2:;
149   }
150   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
151   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
152   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
153   PetscFunctionReturn(0);
154 }
155 
156 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
157 {
158   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
159   PetscBool         cong;
160 
161   PetscFunctionBegin;
162   PetscCall(MatHasCongruentLayouts(Y,&cong));
163   if (Y->assembled && cong) {
164     PetscCall(MatDiagonalSet(aij->A,D,is));
165   } else {
166     PetscCall(MatDiagonalSet_Default(Y,D,is));
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
172 {
173   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
174   PetscInt       i,rstart,nrows,*rows;
175 
176   PetscFunctionBegin;
177   *zrows = NULL;
178   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
179   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
180   for (i=0; i<nrows; i++) rows[i] += rstart;
181   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
186 {
187   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
188   PetscInt          i,m,n,*garray = aij->garray;
189   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
190   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
191   PetscReal         *work;
192   const PetscScalar *dummy;
193 
194   PetscFunctionBegin;
195   PetscCall(MatGetSize(A,&m,&n));
196   PetscCall(PetscCalloc1(n,&work));
197   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
198   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
199   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
200   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
201   if (type == NORM_2) {
202     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
203       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
204     }
205     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
206       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
207     }
208   } else if (type == NORM_1) {
209     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
210       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
211     }
212     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
213       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
214     }
215   } else if (type == NORM_INFINITY) {
216     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
217       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
218     }
219     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
220       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
221     }
222   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
223     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
224       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
225     }
226     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
227       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228     }
229   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
230     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
231       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
232     }
233     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
234       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
235     }
236   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
237   if (type == NORM_INFINITY) {
238     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
239   } else {
240     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
241   }
242   PetscCall(PetscFree(work));
243   if (type == NORM_2) {
244     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
245   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
246     for (i=0; i<n; i++) reductions[i] /= m;
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
252 {
253   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
254   IS              sis,gis;
255   const PetscInt  *isis,*igis;
256   PetscInt        n,*iis,nsis,ngis,rstart,i;
257 
258   PetscFunctionBegin;
259   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
260   PetscCall(MatFindNonzeroRows(a->B,&gis));
261   PetscCall(ISGetSize(gis,&ngis));
262   PetscCall(ISGetSize(sis,&nsis));
263   PetscCall(ISGetIndices(sis,&isis));
264   PetscCall(ISGetIndices(gis,&igis));
265 
266   PetscCall(PetscMalloc1(ngis+nsis,&iis));
267   PetscCall(PetscArraycpy(iis,igis,ngis));
268   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
269   n    = ngis + nsis;
270   PetscCall(PetscSortRemoveDupsInt(&n,iis));
271   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
272   for (i=0; i<n; i++) iis[i] += rstart;
273   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
274 
275   PetscCall(ISRestoreIndices(sis,&isis));
276   PetscCall(ISRestoreIndices(gis,&igis));
277   PetscCall(ISDestroy(&sis));
278   PetscCall(ISDestroy(&gis));
279   PetscFunctionReturn(0);
280 }
281 
282 /*
283   Local utility routine that creates a mapping from the global column
284 number to the local number in the off-diagonal part of the local
285 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
286 a slightly higher hash table cost; without it it is not scalable (each processor
287 has an order N integer array but is fast to access.
288 */
289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
290 {
291   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
292   PetscInt       n = aij->B->cmap->n,i;
293 
294   PetscFunctionBegin;
295   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
296 #if defined(PETSC_USE_CTABLE)
297   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
298   for (i=0; i<n; i++) {
299     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
300   }
301 #else
302   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
303   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
304   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
305 #endif
306   PetscFunctionReturn(0);
307 }
308 
309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
310 { \
311     if (col <= lastcol1)  low1 = 0;     \
312     else                 high1 = nrow1; \
313     lastcol1 = col;\
314     while (high1-low1 > 5) { \
315       t = (low1+high1)/2; \
316       if (rp1[t] > col) high1 = t; \
317       else              low1  = t; \
318     } \
319       for (_i=low1; _i<high1; _i++) { \
320         if (rp1[_i] > col) break; \
321         if (rp1[_i] == col) { \
322           if (addv == ADD_VALUES) { \
323             ap1[_i] += value;   \
324             /* Not sure LogFlops will slow dow the code or not */ \
325             (void)PetscLogFlops(1.0);   \
326            } \
327           else                    ap1[_i] = value; \
328           goto a_noinsert; \
329         } \
330       }  \
331       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
332       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
333       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
334       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
335       N = nrow1++ - 1; a->nz++; high1++; \
336       /* shift up all the later entries in this row */ \
337       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
338       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
339       rp1[_i] = col;  \
340       ap1[_i] = value;  \
341       A->nonzerostate++;\
342       a_noinsert: ; \
343       ailen[row] = nrow1; \
344 }
345 
346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
347   { \
348     if (col <= lastcol2) low2 = 0;                        \
349     else high2 = nrow2;                                   \
350     lastcol2 = col;                                       \
351     while (high2-low2 > 5) {                              \
352       t = (low2+high2)/2;                                 \
353       if (rp2[t] > col) high2 = t;                        \
354       else             low2  = t;                         \
355     }                                                     \
356     for (_i=low2; _i<high2; _i++) {                       \
357       if (rp2[_i] > col) break;                           \
358       if (rp2[_i] == col) {                               \
359         if (addv == ADD_VALUES) {                         \
360           ap2[_i] += value;                               \
361           (void)PetscLogFlops(1.0);                       \
362         }                                                 \
363         else                    ap2[_i] = value;          \
364         goto b_noinsert;                                  \
365       }                                                   \
366     }                                                     \
367     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
368     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
369     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
370     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
371     N = nrow2++ - 1; b->nz++; high2++;                    \
372     /* shift up all the later entries in this row */      \
373     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
374     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
375     rp2[_i] = col;                                        \
376     ap2[_i] = value;                                      \
377     B->nonzerostate++;                                    \
378     b_noinsert: ;                                         \
379     bilen[row] = nrow2;                                   \
380   }
381 
382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
383 {
384   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
385   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
386   PetscInt       l,*garray = mat->garray,diag;
387   PetscScalar    *aa,*ba;
388 
389   PetscFunctionBegin;
390   /* code only works for square matrices A */
391 
392   /* find size of row to the left of the diagonal part */
393   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
394   row  = row - diag;
395   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
396     if (garray[b->j[b->i[row]+l]] > diag) break;
397   }
398   if (l) {
399     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
400     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
401     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
402   }
403 
404   /* diagonal part */
405   if (a->i[row+1]-a->i[row]) {
406     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
407     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
408     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
409   }
410 
411   /* right of diagonal part */
412   if (b->i[row+1]-b->i[row]-l) {
413     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
414     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
421 {
422   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
423   PetscScalar    value = 0.0;
424   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
425   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
426   PetscBool      roworiented = aij->roworiented;
427 
428   /* Some Variables required in the macro */
429   Mat        A                    = aij->A;
430   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
431   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
432   PetscBool  ignorezeroentries    = a->ignorezeroentries;
433   Mat        B                    = aij->B;
434   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
435   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
436   MatScalar  *aa,*ba;
437   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
438   PetscInt   nonew;
439   MatScalar  *ap1,*ap2;
440 
441   PetscFunctionBegin;
442   PetscCall(MatSeqAIJGetArray(A,&aa));
443   PetscCall(MatSeqAIJGetArray(B,&ba));
444   for (i=0; i<m; i++) {
445     if (im[i] < 0) continue;
446     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
447     if (im[i] >= rstart && im[i] < rend) {
448       row      = im[i] - rstart;
449       lastcol1 = -1;
450       rp1      = aj + ai[row];
451       ap1      = aa + ai[row];
452       rmax1    = aimax[row];
453       nrow1    = ailen[row];
454       low1     = 0;
455       high1    = nrow1;
456       lastcol2 = -1;
457       rp2      = bj + bi[row];
458       ap2      = ba + bi[row];
459       rmax2    = bimax[row];
460       nrow2    = bilen[row];
461       low2     = 0;
462       high2    = nrow2;
463 
464       for (j=0; j<n; j++) {
465         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
466         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
467         if (in[j] >= cstart && in[j] < cend) {
468           col   = in[j] - cstart;
469           nonew = a->nonew;
470           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
471         } else if (in[j] < 0) continue;
472         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
473         else {
474           if (mat->was_assembled) {
475             if (!aij->colmap) {
476               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
477             }
478 #if defined(PETSC_USE_CTABLE)
479             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
480             col--;
481 #else
482             col = aij->colmap[in[j]] - 1;
483 #endif
484             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
485               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
486               col  =  in[j];
487               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
488               B        = aij->B;
489               b        = (Mat_SeqAIJ*)B->data;
490               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
491               rp2      = bj + bi[row];
492               ap2      = ba + bi[row];
493               rmax2    = bimax[row];
494               nrow2    = bilen[row];
495               low2     = 0;
496               high2    = nrow2;
497               bm       = aij->B->rmap->n;
498               ba       = b->a;
499             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
500               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
501                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
502               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
503             }
504           } else col = in[j];
505           nonew = b->nonew;
506           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
507         }
508       }
509     } else {
510       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
515         } else {
516           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
517         }
518       }
519     }
520   }
521   PetscCall(MatSeqAIJRestoreArray(A,&aa));
522   PetscCall(MatSeqAIJRestoreArray(B,&ba));
523   PetscFunctionReturn(0);
524 }
525 
526 /*
527     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
528     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
529     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
530 */
531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
532 {
533   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
534   Mat            A           = aij->A; /* diagonal part of the matrix */
535   Mat            B           = aij->B; /* offdiagonal part of the matrix */
536   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
537   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
538   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
539   PetscInt       *ailen      = a->ilen,*aj = a->j;
540   PetscInt       *bilen      = b->ilen,*bj = b->j;
541   PetscInt       am          = aij->A->rmap->n,j;
542   PetscInt       diag_so_far = 0,dnz;
543   PetscInt       offd_so_far = 0,onz;
544 
545   PetscFunctionBegin;
546   /* Iterate over all rows of the matrix */
547   for (j=0; j<am; j++) {
548     dnz = onz = 0;
549     /*  Iterate over all non-zero columns of the current row */
550     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
551       /* If column is in the diagonal */
552       if (mat_j[col] >= cstart && mat_j[col] < cend) {
553         aj[diag_so_far++] = mat_j[col] - cstart;
554         dnz++;
555       } else { /* off-diagonal entries */
556         bj[offd_so_far++] = mat_j[col];
557         onz++;
558       }
559     }
560     ailen[j] = dnz;
561     bilen[j] = onz;
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
568     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
569     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
570     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
571     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
572 */
573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
574 {
575   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
576   Mat            A      = aij->A; /* diagonal part of the matrix */
577   Mat            B      = aij->B; /* offdiagonal part of the matrix */
578   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
579   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
580   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
581   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
582   PetscInt       *ailen = a->ilen,*aj = a->j;
583   PetscInt       *bilen = b->ilen,*bj = b->j;
584   PetscInt       am     = aij->A->rmap->n,j;
585   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
587   PetscScalar    *aa = a->a,*ba = b->a;
588 
589   PetscFunctionBegin;
590   /* Iterate over all rows of the matrix */
591   for (j=0; j<am; j++) {
592     dnz_row = onz_row = 0;
593     rowstart_offd = full_offd_i[j];
594     rowstart_diag = full_diag_i[j];
595     /*  Iterate over all non-zero columns of the current row */
596     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
597       /* If column is in the diagonal */
598       if (mat_j[col] >= cstart && mat_j[col] < cend) {
599         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
600         aa[rowstart_diag+dnz_row] = mat_a[col];
601         dnz_row++;
602       } else { /* off-diagonal entries */
603         bj[rowstart_offd+onz_row] = mat_j[col];
604         ba[rowstart_offd+onz_row] = mat_a[col];
605         onz_row++;
606       }
607     }
608     ailen[j] = dnz_row;
609     bilen[j] = onz_row;
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615 {
616   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
617   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
618   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
619 
620   PetscFunctionBegin;
621   for (i=0; i<m; i++) {
622     if (idxm[i] < 0) continue; /* negative row */
623     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
624     if (idxm[i] >= rstart && idxm[i] < rend) {
625       row = idxm[i] - rstart;
626       for (j=0; j<n; j++) {
627         if (idxn[j] < 0) continue; /* negative column */
628         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
629         if (idxn[j] >= cstart && idxn[j] < cend) {
630           col  = idxn[j] - cstart;
631           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
632         } else {
633           if (!aij->colmap) {
634             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
635           }
636 #if defined(PETSC_USE_CTABLE)
637           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
638           col--;
639 #else
640           col = aij->colmap[idxn[j]] - 1;
641 #endif
642           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
643           else {
644             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
645           }
646         }
647       }
648     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
654 {
655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
662   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
663   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscMPIInt    n;
671   PetscInt       i,j,rstart,ncols,flg;
672   PetscInt       *row,*col;
673   PetscBool      other_disassembled;
674   PetscScalar    *val;
675 
676   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
677 
678   PetscFunctionBegin;
679   if (!aij->donotstash && !mat->nooffprocentries) {
680     while (1) {
681       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
682       if (!flg) break;
683 
684       for (i=0; i<n;) {
685         /* Now identify the consecutive vals belonging to the same row */
686         for (j=i,rstart=row[j]; j<n; j++) {
687           if (row[j] != rstart) break;
688         }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
693         i    = j;
694       }
695     }
696     PetscCall(MatStashScatterEnd_Private(&mat->stash));
697   }
698 #if defined(PETSC_HAVE_DEVICE)
699   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
700   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
701   if (mat->boundtocpu) {
702     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
703     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
704   }
705 #endif
706   PetscCall(MatAssemblyBegin(aij->A,mode));
707   PetscCall(MatAssemblyEnd(aij->A,mode));
708 
709   /* determine if any processor has disassembled, if so we must
710      also disassemble ourself, in order that we may reassemble. */
711   /*
712      if nonzero structure of submatrix B cannot change then we know that
713      no processor disassembled thus we can skip this stuff
714   */
715   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
716     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
717     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
718       PetscCall(MatDisAssemble_MPIAIJ(mat));
719     }
720   }
721   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
722     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
723   }
724   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
725 #if defined(PETSC_HAVE_DEVICE)
726   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
727 #endif
728   PetscCall(MatAssemblyBegin(aij->B,mode));
729   PetscCall(MatAssemblyEnd(aij->B,mode));
730 
731   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
732 
733   aij->rowvalues = NULL;
734 
735   PetscCall(VecDestroy(&aij->diag));
736 
737   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
738   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
739     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
740     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
741   }
742 #if defined(PETSC_HAVE_DEVICE)
743   mat->offloadmask = PETSC_OFFLOAD_BOTH;
744 #endif
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
749 {
750   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
751 
752   PetscFunctionBegin;
753   PetscCall(MatZeroEntries(l->A));
754   PetscCall(MatZeroEntries(l->B));
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
759 {
760   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
761   PetscObjectState sA, sB;
762   PetscInt        *lrows;
763   PetscInt         r, len;
764   PetscBool        cong, lch, gch;
765 
766   PetscFunctionBegin;
767   /* get locally owned rows */
768   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
769   PetscCall(MatHasCongruentLayouts(A,&cong));
770   /* fix right hand side if needed */
771   if (x && b) {
772     const PetscScalar *xx;
773     PetscScalar       *bb;
774 
775     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
776     PetscCall(VecGetArrayRead(x, &xx));
777     PetscCall(VecGetArray(b, &bb));
778     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
779     PetscCall(VecRestoreArrayRead(x, &xx));
780     PetscCall(VecRestoreArray(b, &bb));
781   }
782 
783   sA = mat->A->nonzerostate;
784   sB = mat->B->nonzerostate;
785 
786   if (diag != 0.0 && cong) {
787     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
788     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
789   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
790     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
791     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
792     PetscInt   nnwA, nnwB;
793     PetscBool  nnzA, nnzB;
794 
795     nnwA = aijA->nonew;
796     nnwB = aijB->nonew;
797     nnzA = aijA->keepnonzeropattern;
798     nnzB = aijB->keepnonzeropattern;
799     if (!nnzA) {
800       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
801       aijA->nonew = 0;
802     }
803     if (!nnzB) {
804       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
805       aijB->nonew = 0;
806     }
807     /* Must zero here before the next loop */
808     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
809     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
810     for (r = 0; r < len; ++r) {
811       const PetscInt row = lrows[r] + A->rmap->rstart;
812       if (row >= A->cmap->N) continue;
813       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
814     }
815     aijA->nonew = nnwA;
816     aijB->nonew = nnwB;
817   } else {
818     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
819     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
820   }
821   PetscCall(PetscFree(lrows));
822   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
823   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
824 
825   /* reduce nonzerostate */
826   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
827   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
828   if (gch) A->nonzerostate++;
829   PetscFunctionReturn(0);
830 }
831 
832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
833 {
834   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
835   PetscMPIInt       n = A->rmap->n;
836   PetscInt          i,j,r,m,len = 0;
837   PetscInt          *lrows,*owners = A->rmap->range;
838   PetscMPIInt       p = 0;
839   PetscSFNode       *rrows;
840   PetscSF           sf;
841   const PetscScalar *xx;
842   PetscScalar       *bb,*mask,*aij_a;
843   Vec               xmask,lmask;
844   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
845   const PetscInt    *aj, *ii,*ridx;
846   PetscScalar       *aa;
847 
848   PetscFunctionBegin;
849   /* Create SF where leaves are input rows and roots are owned rows */
850   PetscCall(PetscMalloc1(n, &lrows));
851   for (r = 0; r < n; ++r) lrows[r] = -1;
852   PetscCall(PetscMalloc1(N, &rrows));
853   for (r = 0; r < N; ++r) {
854     const PetscInt idx   = rows[r];
855     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
856     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
857       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
858     }
859     rrows[r].rank  = p;
860     rrows[r].index = rows[r] - owners[p];
861   }
862   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
863   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
864   /* Collect flags for rows to be zeroed */
865   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
866   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
867   PetscCall(PetscSFDestroy(&sf));
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
872   /* handle off diagonal part of matrix */
873   PetscCall(MatCreateVecs(A,&xmask,NULL));
874   PetscCall(VecDuplicate(l->lvec,&lmask));
875   PetscCall(VecGetArray(xmask,&bb));
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   PetscCall(VecRestoreArray(xmask,&bb));
878   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
879   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
880   PetscCall(VecDestroy(&xmask));
881   if (x && b) { /* this code is buggy when the row and column layout don't match */
882     PetscBool cong;
883 
884     PetscCall(MatHasCongruentLayouts(A,&cong));
885     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
886     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
887     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
888     PetscCall(VecGetArrayRead(l->lvec,&xx));
889     PetscCall(VecGetArray(b,&bb));
890   }
891   PetscCall(VecGetArray(lmask,&mask));
892   /* remove zeroed rows of off diagonal matrix */
893   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
894   ii = aij->i;
895   for (i=0; i<len; i++) {
896     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
897   }
898   /* loop over all elements of off process part of matrix zeroing removed columns*/
899   if (aij->compressedrow.use) {
900     m    = aij->compressedrow.nrows;
901     ii   = aij->compressedrow.i;
902     ridx = aij->compressedrow.rindex;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij_a + ii[i];
907 
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[*ridx] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916       ridx++;
917     }
918   } else { /* do not use compressed row format */
919     m = l->B->rmap->n;
920     for (i=0; i<m; i++) {
921       n  = ii[i+1] - ii[i];
922       aj = aij->j + ii[i];
923       aa = aij_a + ii[i];
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[i] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932     }
933   }
934   if (x && b) {
935     PetscCall(VecRestoreArray(b,&bb));
936     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
937   }
938   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
939   PetscCall(VecRestoreArray(lmask,&mask));
940   PetscCall(VecDestroy(&lmask));
941   PetscCall(PetscFree(lrows));
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscInt       nt;
955   VecScatter     Mvctx = a->Mvctx;
956 
957   PetscFunctionBegin;
958   PetscCall(VecGetLocalSize(xx,&nt));
959   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
960   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
961   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
962   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
963   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970 
971   PetscFunctionBegin;
972   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   VecScatter     Mvctx = a->Mvctx;
980 
981   PetscFunctionBegin;
982   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
983   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
984   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
985   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992 
993   PetscFunctionBegin;
994   /* do nondiagonal part */
995   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
996   /* do local part */
997   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
998   /* add partial results together */
999   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1000   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1005 {
1006   MPI_Comm       comm;
1007   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1008   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1009   IS             Me,Notme;
1010   PetscInt       M,N,first,last,*notme,i;
1011   PetscBool      lf;
1012   PetscMPIInt    size;
1013 
1014   PetscFunctionBegin;
1015   /* Easy test: symmetric diagonal block */
1016   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1017   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1018   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1019   if (!*f) PetscFunctionReturn(0);
1020   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1021   PetscCallMPI(MPI_Comm_size(comm,&size));
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025   PetscCall(MatGetSize(Amat,&M,&N));
1026   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1027   PetscCall(PetscMalloc1(N-last+first,&notme));
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1031   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1032   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1033   Aoff = Aoffs[0];
1034   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1035   Boff = Boffs[0];
1036   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1037   PetscCall(MatDestroyMatrices(1,&Aoffs));
1038   PetscCall(MatDestroyMatrices(1,&Boffs));
1039   PetscCall(ISDestroy(&Me));
1040   PetscCall(ISDestroy(&Notme));
1041   PetscCall(PetscFree(notme));
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046 {
1047   PetscFunctionBegin;
1048   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1053 {
1054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1055 
1056   PetscFunctionBegin;
1057   /* do nondiagonal part */
1058   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1059   /* do local part */
1060   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1061   /* add partial results together */
1062   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1063   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1077   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1078   PetscCall(MatGetDiagonal(a->A,v));
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1083 {
1084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1085 
1086   PetscFunctionBegin;
1087   PetscCall(MatScale(a->A,aa));
1088   PetscCall(MatScale(a->B,aa));
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1094 {
1095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1096 
1097   PetscFunctionBegin;
1098   PetscCall(PetscSFDestroy(&aij->coo_sf));
1099   PetscCall(PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1));
1100   PetscCall(PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2));
1101   PetscCall(PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2));
1102   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1103   PetscCall(PetscFree(aij->Cperm1));
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110 
1111   PetscFunctionBegin;
1112 #if defined(PETSC_USE_LOG)
1113   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1114 #endif
1115   PetscCall(MatStashDestroy_Private(&mat->stash));
1116   PetscCall(VecDestroy(&aij->diag));
1117   PetscCall(MatDestroy(&aij->A));
1118   PetscCall(MatDestroy(&aij->B));
1119 #if defined(PETSC_USE_CTABLE)
1120   PetscCall(PetscTableDestroy(&aij->colmap));
1121 #else
1122   PetscCall(PetscFree(aij->colmap));
1123 #endif
1124   PetscCall(PetscFree(aij->garray));
1125   PetscCall(VecDestroy(&aij->lvec));
1126   PetscCall(VecScatterDestroy(&aij->Mvctx));
1127   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1128   PetscCall(PetscFree(aij->ld));
1129 
1130   /* Free COO */
1131   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1132 
1133   PetscCall(PetscFree(mat->data));
1134 
1135   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1136   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1137 
1138   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1139   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1140   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1141   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1142   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1143   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1144   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1145   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1146   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1147   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1148 #if defined(PETSC_HAVE_CUDA)
1149   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1150 #endif
1151 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1152   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1153 #endif
1154   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1155 #if defined(PETSC_HAVE_ELEMENTAL)
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1157 #endif
1158 #if defined(PETSC_HAVE_SCALAPACK)
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1160 #endif
1161 #if defined(PETSC_HAVE_HYPRE)
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1164 #endif
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1171 #if defined(PETSC_HAVE_MKL_SPARSE)
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1173 #endif
1174   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1176   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1183 {
1184   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1185   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1186   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1187   const PetscInt    *garray = aij->garray;
1188   const PetscScalar *aa,*ba;
1189   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1190   PetscInt          *rowlens;
1191   PetscInt          *colidxs;
1192   PetscScalar       *matvals;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(PetscViewerSetUp(viewer));
1196 
1197   M  = mat->rmap->N;
1198   N  = mat->cmap->N;
1199   m  = mat->rmap->n;
1200   rs = mat->rmap->rstart;
1201   cs = mat->cmap->rstart;
1202   nz = A->nz + B->nz;
1203 
1204   /* write matrix header */
1205   header[0] = MAT_FILE_CLASSID;
1206   header[1] = M; header[2] = N; header[3] = nz;
1207   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1208   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1209 
1210   /* fill in and store row lengths  */
1211   PetscCall(PetscMalloc1(m,&rowlens));
1212   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1213   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1214   PetscCall(PetscFree(rowlens));
1215 
1216   /* fill in and store column indices */
1217   PetscCall(PetscMalloc1(nz,&colidxs));
1218   for (cnt=0, i=0; i<m; i++) {
1219     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1220       if (garray[B->j[jb]] > cs) break;
1221       colidxs[cnt++] = garray[B->j[jb]];
1222     }
1223     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1224       colidxs[cnt++] = A->j[ja] + cs;
1225     for (; jb<B->i[i+1]; jb++)
1226       colidxs[cnt++] = garray[B->j[jb]];
1227   }
1228   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1229   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1230   PetscCall(PetscFree(colidxs));
1231 
1232   /* fill in and store nonzero values */
1233   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1234   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1235   PetscCall(PetscMalloc1(nz,&matvals));
1236   for (cnt=0, i=0; i<m; i++) {
1237     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       matvals[cnt++] = ba[jb];
1240     }
1241     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1242       matvals[cnt++] = aa[ja];
1243     for (; jb<B->i[i+1]; jb++)
1244       matvals[cnt++] = ba[jb];
1245   }
1246   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1247   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1248   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1249   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1250   PetscCall(PetscFree(matvals));
1251 
1252   /* write block size option to the viewer's .info file */
1253   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #include <petscdraw.h>
1258 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1259 {
1260   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1261   PetscMPIInt       rank = aij->rank,size = aij->size;
1262   PetscBool         isdraw,iascii,isbinary;
1263   PetscViewer       sviewer;
1264   PetscViewerFormat format;
1265 
1266   PetscFunctionBegin;
1267   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1268   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1269   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1270   if (iascii) {
1271     PetscCall(PetscViewerGetFormat(viewer,&format));
1272     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1273       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1274       PetscCall(PetscMalloc1(size,&nz));
1275       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1276       for (i=0; i<(PetscInt)size; i++) {
1277         nmax = PetscMax(nmax,nz[i]);
1278         nmin = PetscMin(nmin,nz[i]);
1279         navg += nz[i];
1280       }
1281       PetscCall(PetscFree(nz));
1282       navg = navg/size;
1283       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1284       PetscFunctionReturn(0);
1285     }
1286     PetscCall(PetscViewerGetFormat(viewer,&format));
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscInt *inodes=NULL;
1290 
1291       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1292       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1293       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1294       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1295       if (!inodes) {
1296         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1297                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1298       } else {
1299         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1300                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1301       }
1302       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1303       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1304       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1305       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1306       PetscCall(PetscViewerFlush(viewer));
1307       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1308       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1309       PetscCall(VecScatterView(aij->Mvctx,viewer));
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1314       if (inodes) {
1315         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1316       } else {
1317         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1326       PetscCall(MatView(aij->A,viewer));
1327     } else {
1328       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (iascii && size == 1) {
1332     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1333     PetscCall(MatView(aij->A,viewer));
1334     PetscFunctionReturn(0);
1335   } else if (isdraw) {
1336     PetscDraw draw;
1337     PetscBool isnull;
1338     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1339     PetscCall(PetscDrawIsNull(draw,&isnull));
1340     if (isnull) PetscFunctionReturn(0);
1341   }
1342 
1343   { /* assemble the entire matrix onto first processor */
1344     Mat A = NULL, Av;
1345     IS  isrow,iscol;
1346 
1347     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1348     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1349     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1350     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1351 /*  The commented code uses MatCreateSubMatrices instead */
1352 /*
1353     Mat *AA, A = NULL, Av;
1354     IS  isrow,iscol;
1355 
1356     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1357     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1358     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1359     if (rank == 0) {
1360        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1361        A    = AA[0];
1362        Av   = AA[0];
1363     }
1364     PetscCall(MatDestroySubMatrices(1,&AA));
1365 */
1366     PetscCall(ISDestroy(&iscol));
1367     PetscCall(ISDestroy(&isrow));
1368     /*
1369        Everyone has to call to draw the matrix since the graphics waits are
1370        synchronized across all processors that share the PetscDraw object
1371     */
1372     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1373     if (rank == 0) {
1374       if (((PetscObject)mat)->name) {
1375         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1376       }
1377       PetscCall(MatView_SeqAIJ(Av,sviewer));
1378     }
1379     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1380     PetscCall(PetscViewerFlush(viewer));
1381     PetscCall(MatDestroy(&A));
1382   }
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1387 {
1388   PetscBool      iascii,isdraw,issocket,isbinary;
1389 
1390   PetscFunctionBegin;
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1395   if (iascii || isdraw || isbinary || issocket) {
1396     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1402 {
1403   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1404   Vec            bb1 = NULL;
1405   PetscBool      hasop;
1406 
1407   PetscFunctionBegin;
1408   if (flag == SOR_APPLY_UPPER) {
1409     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1410     PetscFunctionReturn(0);
1411   }
1412 
1413   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1414     PetscCall(VecDuplicate(bb,&bb1));
1415   }
1416 
1417   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1418     if (flag & SOR_ZERO_INITIAL_GUESS) {
1419       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1420       its--;
1421     }
1422 
1423     while (its--) {
1424       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1425       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1426 
1427       /* update rhs: bb1 = bb - B*x */
1428       PetscCall(VecScale(mat->lvec,-1.0));
1429       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1430 
1431       /* local sweep */
1432       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1433     }
1434   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1435     if (flag & SOR_ZERO_INITIAL_GUESS) {
1436       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1437       its--;
1438     }
1439     while (its--) {
1440       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1441       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1442 
1443       /* update rhs: bb1 = bb - B*x */
1444       PetscCall(VecScale(mat->lvec,-1.0));
1445       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1446 
1447       /* local sweep */
1448       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1449     }
1450   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1451     if (flag & SOR_ZERO_INITIAL_GUESS) {
1452       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1453       its--;
1454     }
1455     while (its--) {
1456       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1457       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       PetscCall(VecScale(mat->lvec,-1.0));
1461       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1462 
1463       /* local sweep */
1464       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1465     }
1466   } else if (flag & SOR_EISENSTAT) {
1467     Vec xx1;
1468 
1469     PetscCall(VecDuplicate(bb,&xx1));
1470     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1471 
1472     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1473     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1474     if (!mat->diag) {
1475       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1476       PetscCall(MatGetDiagonal(matin,mat->diag));
1477     }
1478     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1479     if (hasop) {
1480       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1481     } else {
1482       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1483     }
1484     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1485 
1486     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1487 
1488     /* local sweep */
1489     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1490     PetscCall(VecAXPY(xx,1.0,xx1));
1491     PetscCall(VecDestroy(&xx1));
1492   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1493 
1494   PetscCall(VecDestroy(&bb1));
1495 
1496   matin->factorerrortype = mat->A->factorerrortype;
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1501 {
1502   Mat            aA,aB,Aperm;
1503   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1504   PetscScalar    *aa,*ba;
1505   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1506   PetscSF        rowsf,sf;
1507   IS             parcolp = NULL;
1508   PetscBool      done;
1509 
1510   PetscFunctionBegin;
1511   PetscCall(MatGetLocalSize(A,&m,&n));
1512   PetscCall(ISGetIndices(rowp,&rwant));
1513   PetscCall(ISGetIndices(colp,&cwant));
1514   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1515 
1516   /* Invert row permutation to find out where my rows should go */
1517   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1518   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1519   PetscCall(PetscSFSetFromOptions(rowsf));
1520   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1521   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1522   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1523 
1524   /* Invert column permutation to find out where my columns should go */
1525   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1526   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1527   PetscCall(PetscSFSetFromOptions(sf));
1528   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1529   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1530   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1531   PetscCall(PetscSFDestroy(&sf));
1532 
1533   PetscCall(ISRestoreIndices(rowp,&rwant));
1534   PetscCall(ISRestoreIndices(colp,&cwant));
1535   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1536 
1537   /* Find out where my gcols should go */
1538   PetscCall(MatGetSize(aB,NULL,&ng));
1539   PetscCall(PetscMalloc1(ng,&gcdest));
1540   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1541   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1542   PetscCall(PetscSFSetFromOptions(sf));
1543   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1544   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1545   PetscCall(PetscSFDestroy(&sf));
1546 
1547   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1548   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1549   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1550   for (i=0; i<m; i++) {
1551     PetscInt    row = rdest[i];
1552     PetscMPIInt rowner;
1553     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1554     for (j=ai[i]; j<ai[i+1]; j++) {
1555       PetscInt    col = cdest[aj[j]];
1556       PetscMPIInt cowner;
1557       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1558       if (rowner == cowner) dnnz[i]++;
1559       else onnz[i]++;
1560     }
1561     for (j=bi[i]; j<bi[i+1]; j++) {
1562       PetscInt    col = gcdest[bj[j]];
1563       PetscMPIInt cowner;
1564       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1565       if (rowner == cowner) dnnz[i]++;
1566       else onnz[i]++;
1567     }
1568   }
1569   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1570   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1571   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1572   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1573   PetscCall(PetscSFDestroy(&rowsf));
1574 
1575   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1576   PetscCall(MatSeqAIJGetArray(aA,&aa));
1577   PetscCall(MatSeqAIJGetArray(aB,&ba));
1578   for (i=0; i<m; i++) {
1579     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1580     PetscInt j0,rowlen;
1581     rowlen = ai[i+1] - ai[i];
1582     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1583       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1584       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1585     }
1586     rowlen = bi[i+1] - bi[i];
1587     for (j0=j=0; j<rowlen; j0=j) {
1588       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1589       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1590     }
1591   }
1592   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1593   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1594   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1595   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1596   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1597   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1598   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1599   PetscCall(PetscFree3(work,rdest,cdest));
1600   PetscCall(PetscFree(gcdest));
1601   if (parcolp) PetscCall(ISDestroy(&colp));
1602   *B = Aperm;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1607 {
1608   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1612   if (ghosts) *ghosts = aij->garray;
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1617 {
1618   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1619   Mat            A    = mat->A,B = mat->B;
1620   PetscLogDouble isend[5],irecv[5];
1621 
1622   PetscFunctionBegin;
1623   info->block_size = 1.0;
1624   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1625 
1626   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1627   isend[3] = info->memory;  isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1630 
1631   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1632   isend[3] += info->memory;  isend[4] += info->mallocs;
1633   if (flag == MAT_LOCAL) {
1634     info->nz_used      = isend[0];
1635     info->nz_allocated = isend[1];
1636     info->nz_unneeded  = isend[2];
1637     info->memory       = isend[3];
1638     info->mallocs      = isend[4];
1639   } else if (flag == MAT_GLOBAL_MAX) {
1640     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1641 
1642     info->nz_used      = irecv[0];
1643     info->nz_allocated = irecv[1];
1644     info->nz_unneeded  = irecv[2];
1645     info->memory       = irecv[3];
1646     info->mallocs      = irecv[4];
1647   } else if (flag == MAT_GLOBAL_SUM) {
1648     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1649 
1650     info->nz_used      = irecv[0];
1651     info->nz_allocated = irecv[1];
1652     info->nz_unneeded  = irecv[2];
1653     info->memory       = irecv[3];
1654     info->mallocs      = irecv[4];
1655   }
1656   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1657   info->fill_ratio_needed = 0;
1658   info->factor_mallocs    = 0;
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1663 {
1664   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1665 
1666   PetscFunctionBegin;
1667   switch (op) {
1668   case MAT_NEW_NONZERO_LOCATIONS:
1669   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1670   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1671   case MAT_KEEP_NONZERO_PATTERN:
1672   case MAT_NEW_NONZERO_LOCATION_ERR:
1673   case MAT_USE_INODES:
1674   case MAT_IGNORE_ZERO_ENTRIES:
1675   case MAT_FORM_EXPLICIT_TRANSPOSE:
1676     MatCheckPreallocated(A,1);
1677     PetscCall(MatSetOption(a->A,op,flg));
1678     PetscCall(MatSetOption(a->B,op,flg));
1679     break;
1680   case MAT_ROW_ORIENTED:
1681     MatCheckPreallocated(A,1);
1682     a->roworiented = flg;
1683 
1684     PetscCall(MatSetOption(a->A,op,flg));
1685     PetscCall(MatSetOption(a->B,op,flg));
1686     break;
1687   case MAT_FORCE_DIAGONAL_ENTRIES:
1688   case MAT_SORTED_FULL:
1689     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1690     break;
1691   case MAT_IGNORE_OFF_PROC_ENTRIES:
1692     a->donotstash = flg;
1693     break;
1694   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1695   case MAT_SPD:
1696   case MAT_SYMMETRIC:
1697   case MAT_STRUCTURALLY_SYMMETRIC:
1698   case MAT_HERMITIAN:
1699   case MAT_SYMMETRY_ETERNAL:
1700     break;
1701   case MAT_SUBMAT_SINGLEIS:
1702     A->submat_singleis = flg;
1703     break;
1704   case MAT_STRUCTURE_ONLY:
1705     /* The option is handled directly by MatSetOption() */
1706     break;
1707   default:
1708     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1714 {
1715   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1716   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1717   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1718   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1719   PetscInt       *cmap,*idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1730     PetscInt   max = 1,tmp;
1731     for (i=0; i<matin->rmap->n; i++) {
1732       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1736   }
1737 
1738   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1742   if (!v)   {pvA = NULL; pvB = NULL;}
1743   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1744   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1745   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1746   nztot = nzA + nzB;
1747 
1748   cmap = mat->garray;
1749   if (v  || idx) {
1750     if (nztot) {
1751       /* Sort by increasing column numbers, assuming A and B already sorted */
1752       PetscInt imark = -1;
1753       if (v) {
1754         *v = v_p = mat->rowvalues;
1755         for (i=0; i<nzB; i++) {
1756           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1757           else break;
1758         }
1759         imark = i;
1760         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1761         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1762       }
1763       if (idx) {
1764         *idx = idx_p = mat->rowindices;
1765         if (imark > -1) {
1766           for (i=0; i<imark; i++) {
1767             idx_p[i] = cmap[cworkB[i]];
1768           }
1769         } else {
1770           for (i=0; i<nzB; i++) {
1771             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1772             else break;
1773           }
1774           imark = i;
1775         }
1776         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1777         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1778       }
1779     } else {
1780       if (idx) *idx = NULL;
1781       if (v)   *v   = NULL;
1782     }
1783   }
1784   *nz  = nztot;
1785   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1786   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1791 {
1792   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1793 
1794   PetscFunctionBegin;
1795   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1796   aij->getrowactive = PETSC_FALSE;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1801 {
1802   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1803   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1804   PetscInt        i,j,cstart = mat->cmap->rstart;
1805   PetscReal       sum = 0.0;
1806   const MatScalar *v,*amata,*bmata;
1807 
1808   PetscFunctionBegin;
1809   if (aij->size == 1) {
1810     PetscCall(MatNorm(aij->A,type,norm));
1811   } else {
1812     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1813     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1814     if (type == NORM_FROBENIUS) {
1815       v = amata;
1816       for (i=0; i<amat->nz; i++) {
1817         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1818       }
1819       v = bmata;
1820       for (i=0; i<bmat->nz; i++) {
1821         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1822       }
1823       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1824       *norm = PetscSqrtReal(*norm);
1825       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1826     } else if (type == NORM_1) { /* max column norm */
1827       PetscReal *tmp,*tmp2;
1828       PetscInt  *jj,*garray = aij->garray;
1829       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1830       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1831       *norm = 0.0;
1832       v     = amata; jj = amat->j;
1833       for (j=0; j<amat->nz; j++) {
1834         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1835       }
1836       v = bmata; jj = bmat->j;
1837       for (j=0; j<bmat->nz; j++) {
1838         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1839       }
1840       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1841       for (j=0; j<mat->cmap->N; j++) {
1842         if (tmp2[j] > *norm) *norm = tmp2[j];
1843       }
1844       PetscCall(PetscFree(tmp));
1845       PetscCall(PetscFree(tmp2));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1847     } else if (type == NORM_INFINITY) { /* max row norm */
1848       PetscReal ntemp = 0.0;
1849       for (j=0; j<aij->A->rmap->n; j++) {
1850         v   = amata + amat->i[j];
1851         sum = 0.0;
1852         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1853           sum += PetscAbsScalar(*v); v++;
1854         }
1855         v = bmata + bmat->i[j];
1856         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1857           sum += PetscAbsScalar(*v); v++;
1858         }
1859         if (sum > ntemp) ntemp = sum;
1860       }
1861       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1862       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1863     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1864     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1865     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1871 {
1872   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1873   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1874   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1875   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1876   Mat             B,A_diag,*B_diag;
1877   const MatScalar *pbv,*bv;
1878 
1879   PetscFunctionBegin;
1880   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1881   ai = Aloc->i; aj = Aloc->j;
1882   bi = Bloc->i; bj = Bloc->j;
1883   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1884     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1885     PetscSFNode          *oloc;
1886     PETSC_UNUSED PetscSF sf;
1887 
1888     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1889     /* compute d_nnz for preallocation */
1890     PetscCall(PetscArrayzero(d_nnz,na));
1891     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1892     /* compute local off-diagonal contributions */
1893     PetscCall(PetscArrayzero(g_nnz,nb));
1894     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1895     /* map those to global */
1896     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1897     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1898     PetscCall(PetscSFSetFromOptions(sf));
1899     PetscCall(PetscArrayzero(o_nnz,na));
1900     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1901     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1902     PetscCall(PetscSFDestroy(&sf));
1903 
1904     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1905     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1906     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1907     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1908     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1909     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1910   } else {
1911     B    = *matout;
1912     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1913   }
1914 
1915   b           = (Mat_MPIAIJ*)B->data;
1916   A_diag      = a->A;
1917   B_diag      = &b->A;
1918   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1919   A_diag_ncol = A_diag->cmap->N;
1920   B_diag_ilen = sub_B_diag->ilen;
1921   B_diag_i    = sub_B_diag->i;
1922 
1923   /* Set ilen for diagonal of B */
1924   for (i=0; i<A_diag_ncol; i++) {
1925     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1926   }
1927 
1928   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1929   very quickly (=without using MatSetValues), because all writes are local. */
1930   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1931 
1932   /* copy over the B part */
1933   PetscCall(PetscMalloc1(bi[mb],&cols));
1934   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1935   pbv  = bv;
1936   row  = A->rmap->rstart;
1937   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1938   cols_tmp = cols;
1939   for (i=0; i<mb; i++) {
1940     ncol = bi[i+1]-bi[i];
1941     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1942     row++;
1943     pbv += ncol; cols_tmp += ncol;
1944   }
1945   PetscCall(PetscFree(cols));
1946   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1947 
1948   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1949   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1950   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1951     *matout = B;
1952   } else {
1953     PetscCall(MatHeaderMerge(A,&B));
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1959 {
1960   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1961   Mat            a    = aij->A,b = aij->B;
1962   PetscInt       s1,s2,s3;
1963 
1964   PetscFunctionBegin;
1965   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1966   if (rr) {
1967     PetscCall(VecGetLocalSize(rr,&s1));
1968     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1969     /* Overlap communication with computation. */
1970     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1971   }
1972   if (ll) {
1973     PetscCall(VecGetLocalSize(ll,&s1));
1974     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1975     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1976   }
1977   /* scale  the diagonal block */
1978   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1979 
1980   if (rr) {
1981     /* Do a scatter end and then right scale the off-diagonal block */
1982     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1983     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1989 {
1990   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1991 
1992   PetscFunctionBegin;
1993   PetscCall(MatSetUnfactored(a->A));
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1998 {
1999   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2000   Mat            a,b,c,d;
2001   PetscBool      flg;
2002 
2003   PetscFunctionBegin;
2004   a = matA->A; b = matA->B;
2005   c = matB->A; d = matB->B;
2006 
2007   PetscCall(MatEqual(a,c,&flg));
2008   if (flg) {
2009     PetscCall(MatEqual(b,d,&flg));
2010   }
2011   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2016 {
2017   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2018   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2019 
2020   PetscFunctionBegin;
2021   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2022   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2023     /* because of the column compression in the off-processor part of the matrix a->B,
2024        the number of columns in a->B and b->B may be different, hence we cannot call
2025        the MatCopy() directly on the two parts. If need be, we can provide a more
2026        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2027        then copying the submatrices */
2028     PetscCall(MatCopy_Basic(A,B,str));
2029   } else {
2030     PetscCall(MatCopy(a->A,b->A,str));
2031     PetscCall(MatCopy(a->B,b->B,str));
2032   }
2033   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2038 {
2039   PetscFunctionBegin;
2040   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 /*
2045    Computes the number of nonzeros per row needed for preallocation when X and Y
2046    have different nonzero structure.
2047 */
2048 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2049 {
2050   PetscInt       i,j,k,nzx,nzy;
2051 
2052   PetscFunctionBegin;
2053   /* Set the number of nonzeros in the new matrix */
2054   for (i=0; i<m; i++) {
2055     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2056     nzx = xi[i+1] - xi[i];
2057     nzy = yi[i+1] - yi[i];
2058     nnz[i] = 0;
2059     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2060       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2061       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2062       nnz[i]++;
2063     }
2064     for (; k<nzy; k++) nnz[i]++;
2065   }
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2070 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2071 {
2072   PetscInt       m = Y->rmap->N;
2073   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2074   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2075 
2076   PetscFunctionBegin;
2077   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2082 {
2083   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2084 
2085   PetscFunctionBegin;
2086   if (str == SAME_NONZERO_PATTERN) {
2087     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2088     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2089   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2090     PetscCall(MatAXPY_Basic(Y,a,X,str));
2091   } else {
2092     Mat      B;
2093     PetscInt *nnz_d,*nnz_o;
2094 
2095     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2096     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2097     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2098     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2099     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2100     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2101     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2102     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2103     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2104     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2105     PetscCall(MatHeaderMerge(Y,&B));
2106     PetscCall(PetscFree(nnz_d));
2107     PetscCall(PetscFree(nnz_o));
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2113 
2114 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2115 {
2116   PetscFunctionBegin;
2117   if (PetscDefined(USE_COMPLEX)) {
2118     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2119 
2120     PetscCall(MatConjugate_SeqAIJ(aij->A));
2121     PetscCall(MatConjugate_SeqAIJ(aij->B));
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2127 {
2128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2129 
2130   PetscFunctionBegin;
2131   PetscCall(MatRealPart(a->A));
2132   PetscCall(MatRealPart(a->B));
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2137 {
2138   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2139 
2140   PetscFunctionBegin;
2141   PetscCall(MatImaginaryPart(a->A));
2142   PetscCall(MatImaginaryPart(a->B));
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2147 {
2148   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2149   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2150   PetscScalar       *va,*vv;
2151   Vec               vB,vA;
2152   const PetscScalar *vb;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2156   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2157 
2158   PetscCall(VecGetArrayWrite(vA,&va));
2159   if (idx) {
2160     for (i=0; i<m; i++) {
2161       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2162     }
2163   }
2164 
2165   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2166   PetscCall(PetscMalloc1(m,&idxb));
2167   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2168 
2169   PetscCall(VecGetArrayWrite(v,&vv));
2170   PetscCall(VecGetArrayRead(vB,&vb));
2171   for (i=0; i<m; i++) {
2172     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2173       vv[i] = vb[i];
2174       if (idx) idx[i] = a->garray[idxb[i]];
2175     } else {
2176       vv[i] = va[i];
2177       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2178         idx[i] = a->garray[idxb[i]];
2179     }
2180   }
2181   PetscCall(VecRestoreArrayWrite(vA,&vv));
2182   PetscCall(VecRestoreArrayWrite(vA,&va));
2183   PetscCall(VecRestoreArrayRead(vB,&vb));
2184   PetscCall(PetscFree(idxb));
2185   PetscCall(VecDestroy(&vA));
2186   PetscCall(VecDestroy(&vB));
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2191 {
2192   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2193   PetscInt          m = A->rmap->n,n = A->cmap->n;
2194   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2195   PetscInt          *cmap  = mat->garray;
2196   PetscInt          *diagIdx, *offdiagIdx;
2197   Vec               diagV, offdiagV;
2198   PetscScalar       *a, *diagA, *offdiagA;
2199   const PetscScalar *ba,*bav;
2200   PetscInt          r,j,col,ncols,*bi,*bj;
2201   Mat               B = mat->B;
2202   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2203 
2204   PetscFunctionBegin;
2205   /* When a process holds entire A and other processes have no entry */
2206   if (A->cmap->N == n) {
2207     PetscCall(VecGetArrayWrite(v,&diagA));
2208     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2209     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2210     PetscCall(VecDestroy(&diagV));
2211     PetscCall(VecRestoreArrayWrite(v,&diagA));
2212     PetscFunctionReturn(0);
2213   } else if (n == 0) {
2214     if (m) {
2215       PetscCall(VecGetArrayWrite(v,&a));
2216       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2217       PetscCall(VecRestoreArrayWrite(v,&a));
2218     }
2219     PetscFunctionReturn(0);
2220   }
2221 
2222   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2223   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2224   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2225   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2226 
2227   /* Get offdiagIdx[] for implicit 0.0 */
2228   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2229   ba   = bav;
2230   bi   = b->i;
2231   bj   = b->j;
2232   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2233   for (r = 0; r < m; r++) {
2234     ncols = bi[r+1] - bi[r];
2235     if (ncols == A->cmap->N - n) { /* Brow is dense */
2236       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2237     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2238       offdiagA[r] = 0.0;
2239 
2240       /* Find first hole in the cmap */
2241       for (j=0; j<ncols; j++) {
2242         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2243         if (col > j && j < cstart) {
2244           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2245           break;
2246         } else if (col > j + n && j >= cstart) {
2247           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2248           break;
2249         }
2250       }
2251       if (j == ncols && ncols < A->cmap->N - n) {
2252         /* a hole is outside compressed Bcols */
2253         if (ncols == 0) {
2254           if (cstart) {
2255             offdiagIdx[r] = 0;
2256           } else offdiagIdx[r] = cend;
2257         } else { /* ncols > 0 */
2258           offdiagIdx[r] = cmap[ncols-1] + 1;
2259           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2260         }
2261       }
2262     }
2263 
2264     for (j=0; j<ncols; j++) {
2265       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2266       ba++; bj++;
2267     }
2268   }
2269 
2270   PetscCall(VecGetArrayWrite(v, &a));
2271   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2272   for (r = 0; r < m; ++r) {
2273     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2274       a[r]   = diagA[r];
2275       if (idx) idx[r] = cstart + diagIdx[r];
2276     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2277       a[r] = diagA[r];
2278       if (idx) {
2279         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2280           idx[r] = cstart + diagIdx[r];
2281         } else idx[r] = offdiagIdx[r];
2282       }
2283     } else {
2284       a[r]   = offdiagA[r];
2285       if (idx) idx[r] = offdiagIdx[r];
2286     }
2287   }
2288   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2289   PetscCall(VecRestoreArrayWrite(v, &a));
2290   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2291   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2292   PetscCall(VecDestroy(&diagV));
2293   PetscCall(VecDestroy(&offdiagV));
2294   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2299 {
2300   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2301   PetscInt          m = A->rmap->n,n = A->cmap->n;
2302   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2303   PetscInt          *cmap  = mat->garray;
2304   PetscInt          *diagIdx, *offdiagIdx;
2305   Vec               diagV, offdiagV;
2306   PetscScalar       *a, *diagA, *offdiagA;
2307   const PetscScalar *ba,*bav;
2308   PetscInt          r,j,col,ncols,*bi,*bj;
2309   Mat               B = mat->B;
2310   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2311 
2312   PetscFunctionBegin;
2313   /* When a process holds entire A and other processes have no entry */
2314   if (A->cmap->N == n) {
2315     PetscCall(VecGetArrayWrite(v,&diagA));
2316     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2317     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2318     PetscCall(VecDestroy(&diagV));
2319     PetscCall(VecRestoreArrayWrite(v,&diagA));
2320     PetscFunctionReturn(0);
2321   } else if (n == 0) {
2322     if (m) {
2323       PetscCall(VecGetArrayWrite(v,&a));
2324       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2325       PetscCall(VecRestoreArrayWrite(v,&a));
2326     }
2327     PetscFunctionReturn(0);
2328   }
2329 
2330   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2331   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2332   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2333   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2334 
2335   /* Get offdiagIdx[] for implicit 0.0 */
2336   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2337   ba   = bav;
2338   bi   = b->i;
2339   bj   = b->j;
2340   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2341   for (r = 0; r < m; r++) {
2342     ncols = bi[r+1] - bi[r];
2343     if (ncols == A->cmap->N - n) { /* Brow is dense */
2344       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2345     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2346       offdiagA[r] = 0.0;
2347 
2348       /* Find first hole in the cmap */
2349       for (j=0; j<ncols; j++) {
2350         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2351         if (col > j && j < cstart) {
2352           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2353           break;
2354         } else if (col > j + n && j >= cstart) {
2355           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2356           break;
2357         }
2358       }
2359       if (j == ncols && ncols < A->cmap->N - n) {
2360         /* a hole is outside compressed Bcols */
2361         if (ncols == 0) {
2362           if (cstart) {
2363             offdiagIdx[r] = 0;
2364           } else offdiagIdx[r] = cend;
2365         } else { /* ncols > 0 */
2366           offdiagIdx[r] = cmap[ncols-1] + 1;
2367           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2368         }
2369       }
2370     }
2371 
2372     for (j=0; j<ncols; j++) {
2373       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2374       ba++; bj++;
2375     }
2376   }
2377 
2378   PetscCall(VecGetArrayWrite(v, &a));
2379   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2380   for (r = 0; r < m; ++r) {
2381     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2382       a[r]   = diagA[r];
2383       if (idx) idx[r] = cstart + diagIdx[r];
2384     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2385       a[r] = diagA[r];
2386       if (idx) {
2387         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2388           idx[r] = cstart + diagIdx[r];
2389         } else idx[r] = offdiagIdx[r];
2390       }
2391     } else {
2392       a[r]   = offdiagA[r];
2393       if (idx) idx[r] = offdiagIdx[r];
2394     }
2395   }
2396   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2397   PetscCall(VecRestoreArrayWrite(v, &a));
2398   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2399   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2400   PetscCall(VecDestroy(&diagV));
2401   PetscCall(VecDestroy(&offdiagV));
2402   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2407 {
2408   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2409   PetscInt          m = A->rmap->n,n = A->cmap->n;
2410   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2411   PetscInt          *cmap  = mat->garray;
2412   PetscInt          *diagIdx, *offdiagIdx;
2413   Vec               diagV, offdiagV;
2414   PetscScalar       *a, *diagA, *offdiagA;
2415   const PetscScalar *ba,*bav;
2416   PetscInt          r,j,col,ncols,*bi,*bj;
2417   Mat               B = mat->B;
2418   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2419 
2420   PetscFunctionBegin;
2421   /* When a process holds entire A and other processes have no entry */
2422   if (A->cmap->N == n) {
2423     PetscCall(VecGetArrayWrite(v,&diagA));
2424     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2425     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2426     PetscCall(VecDestroy(&diagV));
2427     PetscCall(VecRestoreArrayWrite(v,&diagA));
2428     PetscFunctionReturn(0);
2429   } else if (n == 0) {
2430     if (m) {
2431       PetscCall(VecGetArrayWrite(v,&a));
2432       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2433       PetscCall(VecRestoreArrayWrite(v,&a));
2434     }
2435     PetscFunctionReturn(0);
2436   }
2437 
2438   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2439   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2440   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2441   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2442 
2443   /* Get offdiagIdx[] for implicit 0.0 */
2444   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2445   ba   = bav;
2446   bi   = b->i;
2447   bj   = b->j;
2448   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2449   for (r = 0; r < m; r++) {
2450     ncols = bi[r+1] - bi[r];
2451     if (ncols == A->cmap->N - n) { /* Brow is dense */
2452       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2453     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2454       offdiagA[r] = 0.0;
2455 
2456       /* Find first hole in the cmap */
2457       for (j=0; j<ncols; j++) {
2458         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2459         if (col > j && j < cstart) {
2460           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2461           break;
2462         } else if (col > j + n && j >= cstart) {
2463           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2464           break;
2465         }
2466       }
2467       if (j == ncols && ncols < A->cmap->N - n) {
2468         /* a hole is outside compressed Bcols */
2469         if (ncols == 0) {
2470           if (cstart) {
2471             offdiagIdx[r] = 0;
2472           } else offdiagIdx[r] = cend;
2473         } else { /* ncols > 0 */
2474           offdiagIdx[r] = cmap[ncols-1] + 1;
2475           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2476         }
2477       }
2478     }
2479 
2480     for (j=0; j<ncols; j++) {
2481       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2482       ba++; bj++;
2483     }
2484   }
2485 
2486   PetscCall(VecGetArrayWrite(v,    &a));
2487   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2488   for (r = 0; r < m; ++r) {
2489     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2490       a[r] = diagA[r];
2491       if (idx) idx[r] = cstart + diagIdx[r];
2492     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2493       a[r] = diagA[r];
2494       if (idx) {
2495         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2496           idx[r] = cstart + diagIdx[r];
2497         } else idx[r] = offdiagIdx[r];
2498       }
2499     } else {
2500       a[r] = offdiagA[r];
2501       if (idx) idx[r] = offdiagIdx[r];
2502     }
2503   }
2504   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2505   PetscCall(VecRestoreArrayWrite(v,       &a));
2506   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2507   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2508   PetscCall(VecDestroy(&diagV));
2509   PetscCall(VecDestroy(&offdiagV));
2510   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2515 {
2516   Mat            *dummy;
2517 
2518   PetscFunctionBegin;
2519   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2520   *newmat = *dummy;
2521   PetscCall(PetscFree(dummy));
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2526 {
2527   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2528 
2529   PetscFunctionBegin;
2530   PetscCall(MatInvertBlockDiagonal(a->A,values));
2531   A->factorerrortype = a->A->factorerrortype;
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2536 {
2537   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2538 
2539   PetscFunctionBegin;
2540   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2541   PetscCall(MatSetRandom(aij->A,rctx));
2542   if (x->assembled) {
2543     PetscCall(MatSetRandom(aij->B,rctx));
2544   } else {
2545     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2546   }
2547   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2548   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2553 {
2554   PetscFunctionBegin;
2555   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2556   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 /*@
2561    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2562 
2563    Collective on Mat
2564 
2565    Input Parameters:
2566 +    A - the matrix
2567 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2568 
2569  Level: advanced
2570 
2571 @*/
2572 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2573 {
2574   PetscFunctionBegin;
2575   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2580 {
2581   PetscBool            sc = PETSC_FALSE,flg;
2582 
2583   PetscFunctionBegin;
2584   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2585   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2586   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2587   if (flg) {
2588     PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2589   }
2590   PetscOptionsHeadEnd();
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2595 {
2596   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2597   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2598 
2599   PetscFunctionBegin;
2600   if (!Y->preallocated) {
2601     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2602   } else if (!aij->nz) {
2603     PetscInt nonew = aij->nonew;
2604     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2605     aij->nonew = nonew;
2606   }
2607   PetscCall(MatShift_Basic(Y,a));
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2612 {
2613   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2614 
2615   PetscFunctionBegin;
2616   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2617   PetscCall(MatMissingDiagonal(a->A,missing,d));
2618   if (d) {
2619     PetscInt rstart;
2620     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2621     *d += rstart;
2622 
2623   }
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2628 {
2629   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2630 
2631   PetscFunctionBegin;
2632   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 /* -------------------------------------------------------------------*/
2637 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2638                                        MatGetRow_MPIAIJ,
2639                                        MatRestoreRow_MPIAIJ,
2640                                        MatMult_MPIAIJ,
2641                                 /* 4*/ MatMultAdd_MPIAIJ,
2642                                        MatMultTranspose_MPIAIJ,
2643                                        MatMultTransposeAdd_MPIAIJ,
2644                                        NULL,
2645                                        NULL,
2646                                        NULL,
2647                                 /*10*/ NULL,
2648                                        NULL,
2649                                        NULL,
2650                                        MatSOR_MPIAIJ,
2651                                        MatTranspose_MPIAIJ,
2652                                 /*15*/ MatGetInfo_MPIAIJ,
2653                                        MatEqual_MPIAIJ,
2654                                        MatGetDiagonal_MPIAIJ,
2655                                        MatDiagonalScale_MPIAIJ,
2656                                        MatNorm_MPIAIJ,
2657                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2658                                        MatAssemblyEnd_MPIAIJ,
2659                                        MatSetOption_MPIAIJ,
2660                                        MatZeroEntries_MPIAIJ,
2661                                 /*24*/ MatZeroRows_MPIAIJ,
2662                                        NULL,
2663                                        NULL,
2664                                        NULL,
2665                                        NULL,
2666                                 /*29*/ MatSetUp_MPIAIJ,
2667                                        NULL,
2668                                        NULL,
2669                                        MatGetDiagonalBlock_MPIAIJ,
2670                                        NULL,
2671                                 /*34*/ MatDuplicate_MPIAIJ,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                        NULL,
2676                                 /*39*/ MatAXPY_MPIAIJ,
2677                                        MatCreateSubMatrices_MPIAIJ,
2678                                        MatIncreaseOverlap_MPIAIJ,
2679                                        MatGetValues_MPIAIJ,
2680                                        MatCopy_MPIAIJ,
2681                                 /*44*/ MatGetRowMax_MPIAIJ,
2682                                        MatScale_MPIAIJ,
2683                                        MatShift_MPIAIJ,
2684                                        MatDiagonalSet_MPIAIJ,
2685                                        MatZeroRowsColumns_MPIAIJ,
2686                                 /*49*/ MatSetRandom_MPIAIJ,
2687                                        NULL,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2692                                        NULL,
2693                                        MatSetUnfactored_MPIAIJ,
2694                                        MatPermute_MPIAIJ,
2695                                        NULL,
2696                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2697                                        MatDestroy_MPIAIJ,
2698                                        MatView_MPIAIJ,
2699                                        NULL,
2700                                        NULL,
2701                                 /*64*/ NULL,
2702                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2703                                        NULL,
2704                                        NULL,
2705                                        NULL,
2706                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2707                                        MatGetRowMinAbs_MPIAIJ,
2708                                        NULL,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                 /*75*/ MatFDColoringApply_AIJ,
2713                                        MatSetFromOptions_MPIAIJ,
2714                                        NULL,
2715                                        NULL,
2716                                        MatFindZeroDiagonals_MPIAIJ,
2717                                 /*80*/ NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*83*/ MatLoad_MPIAIJ,
2721                                        MatIsSymmetric_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                 /*89*/ NULL,
2727                                        NULL,
2728                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2729                                        NULL,
2730                                        NULL,
2731                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                        MatBindToCPU_MPIAIJ,
2736                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        MatConjugate_MPIAIJ,
2740                                        NULL,
2741                                 /*104*/MatSetValuesRow_MPIAIJ,
2742                                        MatRealPart_MPIAIJ,
2743                                        MatImaginaryPart_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                 /*109*/NULL,
2747                                        NULL,
2748                                        MatGetRowMin_MPIAIJ,
2749                                        NULL,
2750                                        MatMissingDiagonal_MPIAIJ,
2751                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2752                                        NULL,
2753                                        MatGetGhosts_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        NULL,
2759                                        NULL,
2760                                        MatGetMultiProcBlock_MPIAIJ,
2761                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2762                                        MatGetColumnReductions_MPIAIJ,
2763                                        MatInvertBlockDiagonal_MPIAIJ,
2764                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2765                                        MatCreateSubMatricesMPI_MPIAIJ,
2766                                 /*129*/NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2770                                        NULL,
2771                                 /*134*/NULL,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                 /*139*/MatSetBlockSizes_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        MatFDColoringSetUp_MPIXAIJ,
2780                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2781                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2782                                 /*145*/NULL,
2783                                        NULL,
2784                                        NULL
2785 };
2786 
2787 /* ----------------------------------------------------------------------------------------*/
2788 
2789 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2790 {
2791   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2792 
2793   PetscFunctionBegin;
2794   PetscCall(MatStoreValues(aij->A));
2795   PetscCall(MatStoreValues(aij->B));
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2800 {
2801   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2802 
2803   PetscFunctionBegin;
2804   PetscCall(MatRetrieveValues(aij->A));
2805   PetscCall(MatRetrieveValues(aij->B));
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2810 {
2811   Mat_MPIAIJ     *b;
2812   PetscMPIInt    size;
2813 
2814   PetscFunctionBegin;
2815   PetscCall(PetscLayoutSetUp(B->rmap));
2816   PetscCall(PetscLayoutSetUp(B->cmap));
2817   b = (Mat_MPIAIJ*)B->data;
2818 
2819 #if defined(PETSC_USE_CTABLE)
2820   PetscCall(PetscTableDestroy(&b->colmap));
2821 #else
2822   PetscCall(PetscFree(b->colmap));
2823 #endif
2824   PetscCall(PetscFree(b->garray));
2825   PetscCall(VecDestroy(&b->lvec));
2826   PetscCall(VecScatterDestroy(&b->Mvctx));
2827 
2828   /* Because the B will have been resized we simply destroy it and create a new one each time */
2829   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2830   PetscCall(MatDestroy(&b->B));
2831   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2832   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2833   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2834   PetscCall(MatSetType(b->B,MATSEQAIJ));
2835   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2836 
2837   if (!B->preallocated) {
2838     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2839     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2840     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2841     PetscCall(MatSetType(b->A,MATSEQAIJ));
2842     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2843   }
2844 
2845   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2846   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2847   B->preallocated  = PETSC_TRUE;
2848   B->was_assembled = PETSC_FALSE;
2849   B->assembled     = PETSC_FALSE;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2854 {
2855   Mat_MPIAIJ     *b;
2856 
2857   PetscFunctionBegin;
2858   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2859   PetscCall(PetscLayoutSetUp(B->rmap));
2860   PetscCall(PetscLayoutSetUp(B->cmap));
2861   b = (Mat_MPIAIJ*)B->data;
2862 
2863 #if defined(PETSC_USE_CTABLE)
2864   PetscCall(PetscTableDestroy(&b->colmap));
2865 #else
2866   PetscCall(PetscFree(b->colmap));
2867 #endif
2868   PetscCall(PetscFree(b->garray));
2869   PetscCall(VecDestroy(&b->lvec));
2870   PetscCall(VecScatterDestroy(&b->Mvctx));
2871 
2872   PetscCall(MatResetPreallocation(b->A));
2873   PetscCall(MatResetPreallocation(b->B));
2874   B->preallocated  = PETSC_TRUE;
2875   B->was_assembled = PETSC_FALSE;
2876   B->assembled = PETSC_FALSE;
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2881 {
2882   Mat            mat;
2883   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2884 
2885   PetscFunctionBegin;
2886   *newmat = NULL;
2887   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2888   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2889   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2890   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2891   a       = (Mat_MPIAIJ*)mat->data;
2892 
2893   mat->factortype   = matin->factortype;
2894   mat->assembled    = matin->assembled;
2895   mat->insertmode   = NOT_SET_VALUES;
2896   mat->preallocated = matin->preallocated;
2897 
2898   a->size         = oldmat->size;
2899   a->rank         = oldmat->rank;
2900   a->donotstash   = oldmat->donotstash;
2901   a->roworiented  = oldmat->roworiented;
2902   a->rowindices   = NULL;
2903   a->rowvalues    = NULL;
2904   a->getrowactive = PETSC_FALSE;
2905 
2906   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2907   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2908 
2909   if (oldmat->colmap) {
2910 #if defined(PETSC_USE_CTABLE)
2911     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2912 #else
2913     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2914     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2915     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2916 #endif
2917   } else a->colmap = NULL;
2918   if (oldmat->garray) {
2919     PetscInt len;
2920     len  = oldmat->B->cmap->n;
2921     PetscCall(PetscMalloc1(len+1,&a->garray));
2922     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2923     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2924   } else a->garray = NULL;
2925 
2926   /* It may happen MatDuplicate is called with a non-assembled matrix
2927      In fact, MatDuplicate only requires the matrix to be preallocated
2928      This may happen inside a DMCreateMatrix_Shell */
2929   if (oldmat->lvec) {
2930     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2931     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2932   }
2933   if (oldmat->Mvctx) {
2934     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2935     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2936   }
2937   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2938   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2939   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2940   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2941   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2942   *newmat = mat;
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2947 {
2948   PetscBool      isbinary, ishdf5;
2949 
2950   PetscFunctionBegin;
2951   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2952   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2953   /* force binary viewer to load .info file if it has not yet done so */
2954   PetscCall(PetscViewerSetUp(viewer));
2955   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2956   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2957   if (isbinary) {
2958     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
2959   } else if (ishdf5) {
2960 #if defined(PETSC_HAVE_HDF5)
2961     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
2962 #else
2963     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2964 #endif
2965   } else {
2966     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2967   }
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2972 {
2973   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2974   PetscInt       *rowidxs,*colidxs;
2975   PetscScalar    *matvals;
2976 
2977   PetscFunctionBegin;
2978   PetscCall(PetscViewerSetUp(viewer));
2979 
2980   /* read in matrix header */
2981   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
2982   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
2983   M  = header[1]; N = header[2]; nz = header[3];
2984   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
2985   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
2986   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
2987 
2988   /* set block sizes from the viewer's .info file */
2989   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
2990   /* set global sizes if not set already */
2991   if (mat->rmap->N < 0) mat->rmap->N = M;
2992   if (mat->cmap->N < 0) mat->cmap->N = N;
2993   PetscCall(PetscLayoutSetUp(mat->rmap));
2994   PetscCall(PetscLayoutSetUp(mat->cmap));
2995 
2996   /* check if the matrix sizes are correct */
2997   PetscCall(MatGetSize(mat,&rows,&cols));
2998   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
2999 
3000   /* read in row lengths and build row indices */
3001   PetscCall(MatGetLocalSize(mat,&m,NULL));
3002   PetscCall(PetscMalloc1(m+1,&rowidxs));
3003   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3004   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3005   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3006   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3007   /* read in column indices and matrix values */
3008   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3009   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3010   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3011   /* store matrix indices and values */
3012   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3013   PetscCall(PetscFree(rowidxs));
3014   PetscCall(PetscFree2(colidxs,matvals));
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 /* Not scalable because of ISAllGather() unless getting all columns. */
3019 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3020 {
3021   IS             iscol_local;
3022   PetscBool      isstride;
3023   PetscMPIInt    lisstride=0,gisstride;
3024 
3025   PetscFunctionBegin;
3026   /* check if we are grabbing all columns*/
3027   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3028 
3029   if (isstride) {
3030     PetscInt  start,len,mstart,mlen;
3031     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3032     PetscCall(ISGetLocalSize(iscol,&len));
3033     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3034     if (mstart == start && mlen-mstart == len) lisstride = 1;
3035   }
3036 
3037   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3038   if (gisstride) {
3039     PetscInt N;
3040     PetscCall(MatGetSize(mat,NULL,&N));
3041     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3042     PetscCall(ISSetIdentity(iscol_local));
3043     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3044   } else {
3045     PetscInt cbs;
3046     PetscCall(ISGetBlockSize(iscol,&cbs));
3047     PetscCall(ISAllGather(iscol,&iscol_local));
3048     PetscCall(ISSetBlockSize(iscol_local,cbs));
3049   }
3050 
3051   *isseq = iscol_local;
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 /*
3056  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3057  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3058 
3059  Input Parameters:
3060    mat - matrix
3061    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3062            i.e., mat->rstart <= isrow[i] < mat->rend
3063    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3064            i.e., mat->cstart <= iscol[i] < mat->cend
3065  Output Parameter:
3066    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3067    iscol_o - sequential column index set for retrieving mat->B
3068    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3069  */
3070 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3071 {
3072   Vec            x,cmap;
3073   const PetscInt *is_idx;
3074   PetscScalar    *xarray,*cmaparray;
3075   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3076   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3077   Mat            B=a->B;
3078   Vec            lvec=a->lvec,lcmap;
3079   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3080   MPI_Comm       comm;
3081   VecScatter     Mvctx=a->Mvctx;
3082 
3083   PetscFunctionBegin;
3084   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3085   PetscCall(ISGetLocalSize(iscol,&ncols));
3086 
3087   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3088   PetscCall(MatCreateVecs(mat,&x,NULL));
3089   PetscCall(VecSet(x,-1.0));
3090   PetscCall(VecDuplicate(x,&cmap));
3091   PetscCall(VecSet(cmap,-1.0));
3092 
3093   /* Get start indices */
3094   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3095   isstart -= ncols;
3096   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3097 
3098   PetscCall(ISGetIndices(iscol,&is_idx));
3099   PetscCall(VecGetArray(x,&xarray));
3100   PetscCall(VecGetArray(cmap,&cmaparray));
3101   PetscCall(PetscMalloc1(ncols,&idx));
3102   for (i=0; i<ncols; i++) {
3103     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3104     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3105     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3106   }
3107   PetscCall(VecRestoreArray(x,&xarray));
3108   PetscCall(VecRestoreArray(cmap,&cmaparray));
3109   PetscCall(ISRestoreIndices(iscol,&is_idx));
3110 
3111   /* Get iscol_d */
3112   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3113   PetscCall(ISGetBlockSize(iscol,&i));
3114   PetscCall(ISSetBlockSize(*iscol_d,i));
3115 
3116   /* Get isrow_d */
3117   PetscCall(ISGetLocalSize(isrow,&m));
3118   rstart = mat->rmap->rstart;
3119   PetscCall(PetscMalloc1(m,&idx));
3120   PetscCall(ISGetIndices(isrow,&is_idx));
3121   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3122   PetscCall(ISRestoreIndices(isrow,&is_idx));
3123 
3124   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3125   PetscCall(ISGetBlockSize(isrow,&i));
3126   PetscCall(ISSetBlockSize(*isrow_d,i));
3127 
3128   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3129   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3130   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3131 
3132   PetscCall(VecDuplicate(lvec,&lcmap));
3133 
3134   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3135   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3136 
3137   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3138   /* off-process column indices */
3139   count = 0;
3140   PetscCall(PetscMalloc1(Bn,&idx));
3141   PetscCall(PetscMalloc1(Bn,&cmap1));
3142 
3143   PetscCall(VecGetArray(lvec,&xarray));
3144   PetscCall(VecGetArray(lcmap,&cmaparray));
3145   for (i=0; i<Bn; i++) {
3146     if (PetscRealPart(xarray[i]) > -1.0) {
3147       idx[count]     = i;                   /* local column index in off-diagonal part B */
3148       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3149       count++;
3150     }
3151   }
3152   PetscCall(VecRestoreArray(lvec,&xarray));
3153   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3154 
3155   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3156   /* cannot ensure iscol_o has same blocksize as iscol! */
3157 
3158   PetscCall(PetscFree(idx));
3159   *garray = cmap1;
3160 
3161   PetscCall(VecDestroy(&x));
3162   PetscCall(VecDestroy(&cmap));
3163   PetscCall(VecDestroy(&lcmap));
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3168 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3169 {
3170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3171   Mat            M = NULL;
3172   MPI_Comm       comm;
3173   IS             iscol_d,isrow_d,iscol_o;
3174   Mat            Asub = NULL,Bsub = NULL;
3175   PetscInt       n;
3176 
3177   PetscFunctionBegin;
3178   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3179 
3180   if (call == MAT_REUSE_MATRIX) {
3181     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3182     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3183     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3184 
3185     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3186     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3187 
3188     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3189     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3190 
3191     /* Update diagonal and off-diagonal portions of submat */
3192     asub = (Mat_MPIAIJ*)(*submat)->data;
3193     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3194     PetscCall(ISGetLocalSize(iscol_o,&n));
3195     if (n) {
3196       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3197     }
3198     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3199     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3200 
3201   } else { /* call == MAT_INITIAL_MATRIX) */
3202     const PetscInt *garray;
3203     PetscInt        BsubN;
3204 
3205     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3206     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3207 
3208     /* Create local submatrices Asub and Bsub */
3209     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3210     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3211 
3212     /* Create submatrix M */
3213     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3214 
3215     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3216     asub = (Mat_MPIAIJ*)M->data;
3217 
3218     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3219     n = asub->B->cmap->N;
3220     if (BsubN > n) {
3221       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3222       const PetscInt *idx;
3223       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3224       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3225 
3226       PetscCall(PetscMalloc1(n,&idx_new));
3227       j = 0;
3228       PetscCall(ISGetIndices(iscol_o,&idx));
3229       for (i=0; i<n; i++) {
3230         if (j >= BsubN) break;
3231         while (subgarray[i] > garray[j]) j++;
3232 
3233         if (subgarray[i] == garray[j]) {
3234           idx_new[i] = idx[j++];
3235         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3236       }
3237       PetscCall(ISRestoreIndices(iscol_o,&idx));
3238 
3239       PetscCall(ISDestroy(&iscol_o));
3240       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3241 
3242     } else if (BsubN < n) {
3243       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3244     }
3245 
3246     PetscCall(PetscFree(garray));
3247     *submat = M;
3248 
3249     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3250     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3251     PetscCall(ISDestroy(&isrow_d));
3252 
3253     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3254     PetscCall(ISDestroy(&iscol_d));
3255 
3256     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3257     PetscCall(ISDestroy(&iscol_o));
3258   }
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3263 {
3264   IS             iscol_local=NULL,isrow_d;
3265   PetscInt       csize;
3266   PetscInt       n,i,j,start,end;
3267   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3268   MPI_Comm       comm;
3269 
3270   PetscFunctionBegin;
3271   /* If isrow has same processor distribution as mat,
3272      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3273   if (call == MAT_REUSE_MATRIX) {
3274     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3275     if (isrow_d) {
3276       sameRowDist  = PETSC_TRUE;
3277       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3278     } else {
3279       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3280       if (iscol_local) {
3281         sameRowDist  = PETSC_TRUE;
3282         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3283       }
3284     }
3285   } else {
3286     /* Check if isrow has same processor distribution as mat */
3287     sameDist[0] = PETSC_FALSE;
3288     PetscCall(ISGetLocalSize(isrow,&n));
3289     if (!n) {
3290       sameDist[0] = PETSC_TRUE;
3291     } else {
3292       PetscCall(ISGetMinMax(isrow,&i,&j));
3293       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3294       if (i >= start && j < end) {
3295         sameDist[0] = PETSC_TRUE;
3296       }
3297     }
3298 
3299     /* Check if iscol has same processor distribution as mat */
3300     sameDist[1] = PETSC_FALSE;
3301     PetscCall(ISGetLocalSize(iscol,&n));
3302     if (!n) {
3303       sameDist[1] = PETSC_TRUE;
3304     } else {
3305       PetscCall(ISGetMinMax(iscol,&i,&j));
3306       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3307       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3308     }
3309 
3310     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3311     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3312     sameRowDist = tsameDist[0];
3313   }
3314 
3315   if (sameRowDist) {
3316     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3317       /* isrow and iscol have same processor distribution as mat */
3318       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3319       PetscFunctionReturn(0);
3320     } else { /* sameRowDist */
3321       /* isrow has same processor distribution as mat */
3322       if (call == MAT_INITIAL_MATRIX) {
3323         PetscBool sorted;
3324         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3325         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3326         PetscCall(ISGetSize(iscol,&i));
3327         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3328 
3329         PetscCall(ISSorted(iscol_local,&sorted));
3330         if (sorted) {
3331           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3332           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3333           PetscFunctionReturn(0);
3334         }
3335       } else { /* call == MAT_REUSE_MATRIX */
3336         IS iscol_sub;
3337         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3338         if (iscol_sub) {
3339           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3340           PetscFunctionReturn(0);
3341         }
3342       }
3343     }
3344   }
3345 
3346   /* General case: iscol -> iscol_local which has global size of iscol */
3347   if (call == MAT_REUSE_MATRIX) {
3348     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3349     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3350   } else {
3351     if (!iscol_local) {
3352       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3353     }
3354   }
3355 
3356   PetscCall(ISGetLocalSize(iscol,&csize));
3357   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3358 
3359   if (call == MAT_INITIAL_MATRIX) {
3360     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3361     PetscCall(ISDestroy(&iscol_local));
3362   }
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 /*@C
3367      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3368          and "off-diagonal" part of the matrix in CSR format.
3369 
3370    Collective
3371 
3372    Input Parameters:
3373 +  comm - MPI communicator
3374 .  A - "diagonal" portion of matrix
3375 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3376 -  garray - global index of B columns
3377 
3378    Output Parameter:
3379 .   mat - the matrix, with input A as its local diagonal matrix
3380    Level: advanced
3381 
3382    Notes:
3383        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3384        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3385 
3386 .seealso: MatCreateMPIAIJWithSplitArrays()
3387 @*/
3388 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3389 {
3390   Mat_MPIAIJ        *maij;
3391   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3392   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3393   const PetscScalar *oa;
3394   Mat               Bnew;
3395   PetscInt          m,n,N;
3396 
3397   PetscFunctionBegin;
3398   PetscCall(MatCreate(comm,mat));
3399   PetscCall(MatGetSize(A,&m,&n));
3400   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3401   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3402   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3403   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3404 
3405   /* Get global columns of mat */
3406   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3407 
3408   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3409   PetscCall(MatSetType(*mat,MATMPIAIJ));
3410   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3411   maij = (Mat_MPIAIJ*)(*mat)->data;
3412 
3413   (*mat)->preallocated = PETSC_TRUE;
3414 
3415   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3416   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3417 
3418   /* Set A as diagonal portion of *mat */
3419   maij->A = A;
3420 
3421   nz = oi[m];
3422   for (i=0; i<nz; i++) {
3423     col   = oj[i];
3424     oj[i] = garray[col];
3425   }
3426 
3427   /* Set Bnew as off-diagonal portion of *mat */
3428   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3429   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3430   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3431   bnew        = (Mat_SeqAIJ*)Bnew->data;
3432   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3433   maij->B     = Bnew;
3434 
3435   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3436 
3437   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3438   b->free_a       = PETSC_FALSE;
3439   b->free_ij      = PETSC_FALSE;
3440   PetscCall(MatDestroy(&B));
3441 
3442   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3443   bnew->free_a       = PETSC_TRUE;
3444   bnew->free_ij      = PETSC_TRUE;
3445 
3446   /* condense columns of maij->B */
3447   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3448   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3449   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3450   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3451   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3452   PetscFunctionReturn(0);
3453 }
3454 
3455 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3456 
3457 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3458 {
3459   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3460   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3461   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3462   Mat            M,Msub,B=a->B;
3463   MatScalar      *aa;
3464   Mat_SeqAIJ     *aij;
3465   PetscInt       *garray = a->garray,*colsub,Ncols;
3466   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3467   IS             iscol_sub,iscmap;
3468   const PetscInt *is_idx,*cmap;
3469   PetscBool      allcolumns=PETSC_FALSE;
3470   MPI_Comm       comm;
3471 
3472   PetscFunctionBegin;
3473   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3474   if (call == MAT_REUSE_MATRIX) {
3475     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3476     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3477     PetscCall(ISGetLocalSize(iscol_sub,&count));
3478 
3479     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3480     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3481 
3482     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3483     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3484 
3485     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3486 
3487   } else { /* call == MAT_INITIAL_MATRIX) */
3488     PetscBool flg;
3489 
3490     PetscCall(ISGetLocalSize(iscol,&n));
3491     PetscCall(ISGetSize(iscol,&Ncols));
3492 
3493     /* (1) iscol -> nonscalable iscol_local */
3494     /* Check for special case: each processor gets entire matrix columns */
3495     PetscCall(ISIdentity(iscol_local,&flg));
3496     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3497     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3498     if (allcolumns) {
3499       iscol_sub = iscol_local;
3500       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3501       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3502 
3503     } else {
3504       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3505       PetscInt *idx,*cmap1,k;
3506       PetscCall(PetscMalloc1(Ncols,&idx));
3507       PetscCall(PetscMalloc1(Ncols,&cmap1));
3508       PetscCall(ISGetIndices(iscol_local,&is_idx));
3509       count = 0;
3510       k     = 0;
3511       for (i=0; i<Ncols; i++) {
3512         j = is_idx[i];
3513         if (j >= cstart && j < cend) {
3514           /* diagonal part of mat */
3515           idx[count]     = j;
3516           cmap1[count++] = i; /* column index in submat */
3517         } else if (Bn) {
3518           /* off-diagonal part of mat */
3519           if (j == garray[k]) {
3520             idx[count]     = j;
3521             cmap1[count++] = i;  /* column index in submat */
3522           } else if (j > garray[k]) {
3523             while (j > garray[k] && k < Bn-1) k++;
3524             if (j == garray[k]) {
3525               idx[count]     = j;
3526               cmap1[count++] = i; /* column index in submat */
3527             }
3528           }
3529         }
3530       }
3531       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3532 
3533       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3534       PetscCall(ISGetBlockSize(iscol,&cbs));
3535       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3536 
3537       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3538     }
3539 
3540     /* (3) Create sequential Msub */
3541     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3542   }
3543 
3544   PetscCall(ISGetLocalSize(iscol_sub,&count));
3545   aij  = (Mat_SeqAIJ*)(Msub)->data;
3546   ii   = aij->i;
3547   PetscCall(ISGetIndices(iscmap,&cmap));
3548 
3549   /*
3550       m - number of local rows
3551       Ncols - number of columns (same on all processors)
3552       rstart - first row in new global matrix generated
3553   */
3554   PetscCall(MatGetSize(Msub,&m,NULL));
3555 
3556   if (call == MAT_INITIAL_MATRIX) {
3557     /* (4) Create parallel newmat */
3558     PetscMPIInt    rank,size;
3559     PetscInt       csize;
3560 
3561     PetscCallMPI(MPI_Comm_size(comm,&size));
3562     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3563 
3564     /*
3565         Determine the number of non-zeros in the diagonal and off-diagonal
3566         portions of the matrix in order to do correct preallocation
3567     */
3568 
3569     /* first get start and end of "diagonal" columns */
3570     PetscCall(ISGetLocalSize(iscol,&csize));
3571     if (csize == PETSC_DECIDE) {
3572       PetscCall(ISGetSize(isrow,&mglobal));
3573       if (mglobal == Ncols) { /* square matrix */
3574         nlocal = m;
3575       } else {
3576         nlocal = Ncols/size + ((Ncols % size) > rank);
3577       }
3578     } else {
3579       nlocal = csize;
3580     }
3581     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3582     rstart = rend - nlocal;
3583     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3584 
3585     /* next, compute all the lengths */
3586     jj    = aij->j;
3587     PetscCall(PetscMalloc1(2*m+1,&dlens));
3588     olens = dlens + m;
3589     for (i=0; i<m; i++) {
3590       jend = ii[i+1] - ii[i];
3591       olen = 0;
3592       dlen = 0;
3593       for (j=0; j<jend; j++) {
3594         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3595         else dlen++;
3596         jj++;
3597       }
3598       olens[i] = olen;
3599       dlens[i] = dlen;
3600     }
3601 
3602     PetscCall(ISGetBlockSize(isrow,&bs));
3603     PetscCall(ISGetBlockSize(iscol,&cbs));
3604 
3605     PetscCall(MatCreate(comm,&M));
3606     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3607     PetscCall(MatSetBlockSizes(M,bs,cbs));
3608     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3609     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3610     PetscCall(PetscFree(dlens));
3611 
3612   } else { /* call == MAT_REUSE_MATRIX */
3613     M    = *newmat;
3614     PetscCall(MatGetLocalSize(M,&i,NULL));
3615     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3616     PetscCall(MatZeroEntries(M));
3617     /*
3618          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3619        rather than the slower MatSetValues().
3620     */
3621     M->was_assembled = PETSC_TRUE;
3622     M->assembled     = PETSC_FALSE;
3623   }
3624 
3625   /* (5) Set values of Msub to *newmat */
3626   PetscCall(PetscMalloc1(count,&colsub));
3627   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3628 
3629   jj   = aij->j;
3630   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3631   for (i=0; i<m; i++) {
3632     row = rstart + i;
3633     nz  = ii[i+1] - ii[i];
3634     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3635     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3636     jj += nz; aa += nz;
3637   }
3638   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3639   PetscCall(ISRestoreIndices(iscmap,&cmap));
3640 
3641   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3642   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3643 
3644   PetscCall(PetscFree(colsub));
3645 
3646   /* save Msub, iscol_sub and iscmap used in processor for next request */
3647   if (call == MAT_INITIAL_MATRIX) {
3648     *newmat = M;
3649     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3650     PetscCall(MatDestroy(&Msub));
3651 
3652     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3653     PetscCall(ISDestroy(&iscol_sub));
3654 
3655     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3656     PetscCall(ISDestroy(&iscmap));
3657 
3658     if (iscol_local) {
3659       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3660       PetscCall(ISDestroy(&iscol_local));
3661     }
3662   }
3663   PetscFunctionReturn(0);
3664 }
3665 
3666 /*
3667     Not great since it makes two copies of the submatrix, first an SeqAIJ
3668   in local and then by concatenating the local matrices the end result.
3669   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3670 
3671   Note: This requires a sequential iscol with all indices.
3672 */
3673 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3674 {
3675   PetscMPIInt    rank,size;
3676   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3677   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3678   Mat            M,Mreuse;
3679   MatScalar      *aa,*vwork;
3680   MPI_Comm       comm;
3681   Mat_SeqAIJ     *aij;
3682   PetscBool      colflag,allcolumns=PETSC_FALSE;
3683 
3684   PetscFunctionBegin;
3685   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3686   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3687   PetscCallMPI(MPI_Comm_size(comm,&size));
3688 
3689   /* Check for special case: each processor gets entire matrix columns */
3690   PetscCall(ISIdentity(iscol,&colflag));
3691   PetscCall(ISGetLocalSize(iscol,&n));
3692   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3693   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3694 
3695   if (call ==  MAT_REUSE_MATRIX) {
3696     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3697     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3698     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3699   } else {
3700     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3701   }
3702 
3703   /*
3704       m - number of local rows
3705       n - number of columns (same on all processors)
3706       rstart - first row in new global matrix generated
3707   */
3708   PetscCall(MatGetSize(Mreuse,&m,&n));
3709   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3710   if (call == MAT_INITIAL_MATRIX) {
3711     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3712     ii  = aij->i;
3713     jj  = aij->j;
3714 
3715     /*
3716         Determine the number of non-zeros in the diagonal and off-diagonal
3717         portions of the matrix in order to do correct preallocation
3718     */
3719 
3720     /* first get start and end of "diagonal" columns */
3721     if (csize == PETSC_DECIDE) {
3722       PetscCall(ISGetSize(isrow,&mglobal));
3723       if (mglobal == n) { /* square matrix */
3724         nlocal = m;
3725       } else {
3726         nlocal = n/size + ((n % size) > rank);
3727       }
3728     } else {
3729       nlocal = csize;
3730     }
3731     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3732     rstart = rend - nlocal;
3733     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3734 
3735     /* next, compute all the lengths */
3736     PetscCall(PetscMalloc1(2*m+1,&dlens));
3737     olens = dlens + m;
3738     for (i=0; i<m; i++) {
3739       jend = ii[i+1] - ii[i];
3740       olen = 0;
3741       dlen = 0;
3742       for (j=0; j<jend; j++) {
3743         if (*jj < rstart || *jj >= rend) olen++;
3744         else dlen++;
3745         jj++;
3746       }
3747       olens[i] = olen;
3748       dlens[i] = dlen;
3749     }
3750     PetscCall(MatCreate(comm,&M));
3751     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3752     PetscCall(MatSetBlockSizes(M,bs,cbs));
3753     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3754     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3755     PetscCall(PetscFree(dlens));
3756   } else {
3757     PetscInt ml,nl;
3758 
3759     M    = *newmat;
3760     PetscCall(MatGetLocalSize(M,&ml,&nl));
3761     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3762     PetscCall(MatZeroEntries(M));
3763     /*
3764          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3765        rather than the slower MatSetValues().
3766     */
3767     M->was_assembled = PETSC_TRUE;
3768     M->assembled     = PETSC_FALSE;
3769   }
3770   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3771   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3772   ii   = aij->i;
3773   jj   = aij->j;
3774 
3775   /* trigger copy to CPU if needed */
3776   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3777   for (i=0; i<m; i++) {
3778     row   = rstart + i;
3779     nz    = ii[i+1] - ii[i];
3780     cwork = jj; jj += nz;
3781     vwork = aa; aa += nz;
3782     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3783   }
3784   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3785 
3786   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3787   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3788   *newmat = M;
3789 
3790   /* save submatrix used in processor for next request */
3791   if (call ==  MAT_INITIAL_MATRIX) {
3792     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3793     PetscCall(MatDestroy(&Mreuse));
3794   }
3795   PetscFunctionReturn(0);
3796 }
3797 
3798 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3799 {
3800   PetscInt       m,cstart, cend,j,nnz,i,d;
3801   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3802   const PetscInt *JJ;
3803   PetscBool      nooffprocentries;
3804 
3805   PetscFunctionBegin;
3806   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3807 
3808   PetscCall(PetscLayoutSetUp(B->rmap));
3809   PetscCall(PetscLayoutSetUp(B->cmap));
3810   m      = B->rmap->n;
3811   cstart = B->cmap->rstart;
3812   cend   = B->cmap->rend;
3813   rstart = B->rmap->rstart;
3814 
3815   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3816 
3817   if (PetscDefined(USE_DEBUG)) {
3818     for (i=0; i<m; i++) {
3819       nnz = Ii[i+1]- Ii[i];
3820       JJ  = J + Ii[i];
3821       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3822       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3823       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3824     }
3825   }
3826 
3827   for (i=0; i<m; i++) {
3828     nnz     = Ii[i+1]- Ii[i];
3829     JJ      = J + Ii[i];
3830     nnz_max = PetscMax(nnz_max,nnz);
3831     d       = 0;
3832     for (j=0; j<nnz; j++) {
3833       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3834     }
3835     d_nnz[i] = d;
3836     o_nnz[i] = nnz - d;
3837   }
3838   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3839   PetscCall(PetscFree2(d_nnz,o_nnz));
3840 
3841   for (i=0; i<m; i++) {
3842     ii   = i + rstart;
3843     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3844   }
3845   nooffprocentries    = B->nooffprocentries;
3846   B->nooffprocentries = PETSC_TRUE;
3847   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3848   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3849   B->nooffprocentries = nooffprocentries;
3850 
3851   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3852   PetscFunctionReturn(0);
3853 }
3854 
3855 /*@
3856    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3857    (the default parallel PETSc format).
3858 
3859    Collective
3860 
3861    Input Parameters:
3862 +  B - the matrix
3863 .  i - the indices into j for the start of each local row (starts with zero)
3864 .  j - the column indices for each local row (starts with zero)
3865 -  v - optional values in the matrix
3866 
3867    Level: developer
3868 
3869    Notes:
3870        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3871      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3872      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873 
3874        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875 
3876        The format which is used for the sparse matrix input, is equivalent to a
3877     row-major ordering.. i.e for the following matrix, the input data expected is
3878     as shown
3879 
3880 $        1 0 0
3881 $        2 0 3     P0
3882 $       -------
3883 $        4 5 6     P1
3884 $
3885 $     Process0 [P0]: rows_owned=[0,1]
3886 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3887 $        j =  {0,0,2}  [size = 3]
3888 $        v =  {1,2,3}  [size = 3]
3889 $
3890 $     Process1 [P1]: rows_owned=[2]
3891 $        i =  {0,3}    [size = nrow+1  = 1+1]
3892 $        j =  {0,1,2}  [size = 3]
3893 $        v =  {4,5,6}  [size = 3]
3894 
3895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3896           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3897 @*/
3898 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3899 {
3900   PetscFunctionBegin;
3901   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 /*@C
3906    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3907    (the default parallel PETSc format).  For good matrix assembly performance
3908    the user should preallocate the matrix storage by setting the parameters
3909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3910    performance can be increased by more than a factor of 50.
3911 
3912    Collective
3913 
3914    Input Parameters:
3915 +  B - the matrix
3916 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3917            (same value is used for all local rows)
3918 .  d_nnz - array containing the number of nonzeros in the various rows of the
3919            DIAGONAL portion of the local submatrix (possibly different for each row)
3920            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3921            The size of this array is equal to the number of local rows, i.e 'm'.
3922            For matrices that will be factored, you must leave room for (and set)
3923            the diagonal entry even if it is zero.
3924 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3925            submatrix (same value is used for all local rows).
3926 -  o_nnz - array containing the number of nonzeros in the various rows of the
3927            OFF-DIAGONAL portion of the local submatrix (possibly different for
3928            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3929            structure. The size of this array is equal to the number
3930            of local rows, i.e 'm'.
3931 
3932    If the *_nnz parameter is given then the *_nz parameter is ignored
3933 
3934    The AIJ format (also called the Yale sparse matrix format or
3935    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3936    storage.  The stored row and column indices begin with zero.
3937    See Users-Manual: ch_mat for details.
3938 
3939    The parallel matrix is partitioned such that the first m0 rows belong to
3940    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3941    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3942 
3943    The DIAGONAL portion of the local submatrix of a processor can be defined
3944    as the submatrix which is obtained by extraction the part corresponding to
3945    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3946    first row that belongs to the processor, r2 is the last row belonging to
3947    the this processor, and c1-c2 is range of indices of the local part of a
3948    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3949    common case of a square matrix, the row and column ranges are the same and
3950    the DIAGONAL part is also square. The remaining portion of the local
3951    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3952 
3953    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3954 
3955    You can call MatGetInfo() to get information on how effective the preallocation was;
3956    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3957    You can also run with the option -info and look for messages with the string
3958    malloc in them to see if additional memory allocation was needed.
3959 
3960    Example usage:
3961 
3962    Consider the following 8x8 matrix with 34 non-zero values, that is
3963    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3964    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3965    as follows:
3966 
3967 .vb
3968             1  2  0  |  0  3  0  |  0  4
3969     Proc0   0  5  6  |  7  0  0  |  8  0
3970             9  0 10  | 11  0  0  | 12  0
3971     -------------------------------------
3972            13  0 14  | 15 16 17  |  0  0
3973     Proc1   0 18  0  | 19 20 21  |  0  0
3974             0  0  0  | 22 23  0  | 24  0
3975     -------------------------------------
3976     Proc2  25 26 27  |  0  0 28  | 29  0
3977            30  0  0  | 31 32 33  |  0 34
3978 .ve
3979 
3980    This can be represented as a collection of submatrices as:
3981 
3982 .vb
3983       A B C
3984       D E F
3985       G H I
3986 .ve
3987 
3988    Where the submatrices A,B,C are owned by proc0, D,E,F are
3989    owned by proc1, G,H,I are owned by proc2.
3990 
3991    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3992    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3993    The 'M','N' parameters are 8,8, and have the same values on all procs.
3994 
3995    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3996    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3997    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3998    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3999    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4000    matrix, ans [DF] as another SeqAIJ matrix.
4001 
4002    When d_nz, o_nz parameters are specified, d_nz storage elements are
4003    allocated for every row of the local diagonal submatrix, and o_nz
4004    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4005    One way to choose d_nz and o_nz is to use the max nonzerors per local
4006    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4007    In this case, the values of d_nz,o_nz are:
4008 .vb
4009      proc0 : dnz = 2, o_nz = 2
4010      proc1 : dnz = 3, o_nz = 2
4011      proc2 : dnz = 1, o_nz = 4
4012 .ve
4013    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4014    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4015    for proc3. i.e we are using 12+15+10=37 storage locations to store
4016    34 values.
4017 
4018    When d_nnz, o_nnz parameters are specified, the storage is specified
4019    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4020    In the above case the values for d_nnz,o_nnz are:
4021 .vb
4022      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4023      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4024      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4025 .ve
4026    Here the space allocated is sum of all the above values i.e 34, and
4027    hence pre-allocation is perfect.
4028 
4029    Level: intermediate
4030 
4031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4032           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4033 @*/
4034 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4035 {
4036   PetscFunctionBegin;
4037   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4038   PetscValidType(B,1);
4039   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 /*@
4044      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4045          CSR format for the local rows.
4046 
4047    Collective
4048 
4049    Input Parameters:
4050 +  comm - MPI communicator
4051 .  m - number of local rows (Cannot be PETSC_DECIDE)
4052 .  n - This value should be the same as the local size used in creating the
4053        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4054        calculated if N is given) For square matrices n is almost always m.
4055 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4056 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4057 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4058 .   j - column indices
4059 -   a - matrix values
4060 
4061    Output Parameter:
4062 .   mat - the matrix
4063 
4064    Level: intermediate
4065 
4066    Notes:
4067        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4068      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4069      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4070 
4071        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4072 
4073        The format which is used for the sparse matrix input, is equivalent to a
4074     row-major ordering.. i.e for the following matrix, the input data expected is
4075     as shown
4076 
4077        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4078 
4079 $        1 0 0
4080 $        2 0 3     P0
4081 $       -------
4082 $        4 5 6     P1
4083 $
4084 $     Process0 [P0]: rows_owned=[0,1]
4085 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4086 $        j =  {0,0,2}  [size = 3]
4087 $        v =  {1,2,3}  [size = 3]
4088 $
4089 $     Process1 [P1]: rows_owned=[2]
4090 $        i =  {0,3}    [size = nrow+1  = 1+1]
4091 $        j =  {0,1,2}  [size = 3]
4092 $        v =  {4,5,6}  [size = 3]
4093 
4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4095           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4096 @*/
4097 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4098 {
4099   PetscFunctionBegin;
4100   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4101   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4102   PetscCall(MatCreate(comm,mat));
4103   PetscCall(MatSetSizes(*mat,m,n,M,N));
4104   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4105   PetscCall(MatSetType(*mat,MATMPIAIJ));
4106   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4107   PetscFunctionReturn(0);
4108 }
4109 
4110 /*@
4111      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4112          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4113 
4114    Collective
4115 
4116    Input Parameters:
4117 +  mat - the matrix
4118 .  m - number of local rows (Cannot be PETSC_DECIDE)
4119 .  n - This value should be the same as the local size used in creating the
4120        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4121        calculated if N is given) For square matrices n is almost always m.
4122 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4123 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4124 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4125 .  J - column indices
4126 -  v - matrix values
4127 
4128    Level: intermediate
4129 
4130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4131           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4132 @*/
4133 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4134 {
4135   PetscInt       cstart,nnz,i,j;
4136   PetscInt       *ld;
4137   PetscBool      nooffprocentries;
4138   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4139   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4140   PetscScalar    *ad,*ao;
4141   const PetscInt *Adi = Ad->i;
4142   PetscInt       ldi,Iii,md;
4143 
4144   PetscFunctionBegin;
4145   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4146   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4147   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4148   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4149 
4150   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4151   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4152   cstart = mat->cmap->rstart;
4153   if (!Aij->ld) {
4154     /* count number of entries below block diagonal */
4155     PetscCall(PetscCalloc1(m,&ld));
4156     Aij->ld = ld;
4157     for (i=0; i<m; i++) {
4158       nnz  = Ii[i+1]- Ii[i];
4159       j     = 0;
4160       while  (J[j] < cstart && j < nnz) {j++;}
4161       J    += nnz;
4162       ld[i] = j;
4163     }
4164   } else {
4165     ld = Aij->ld;
4166   }
4167 
4168   for (i=0; i<m; i++) {
4169     nnz  = Ii[i+1]- Ii[i];
4170     Iii  = Ii[i];
4171     ldi  = ld[i];
4172     md   = Adi[i+1]-Adi[i];
4173     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4174     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4175     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4176     ad  += md;
4177     ao  += nnz - md;
4178   }
4179   nooffprocentries      = mat->nooffprocentries;
4180   mat->nooffprocentries = PETSC_TRUE;
4181   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4182   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4183   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4184   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4185   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4186   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4187   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4188   mat->nooffprocentries = nooffprocentries;
4189   PetscFunctionReturn(0);
4190 }
4191 
4192 /*@C
4193    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4194    (the default parallel PETSc format).  For good matrix assembly performance
4195    the user should preallocate the matrix storage by setting the parameters
4196    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4197    performance can be increased by more than a factor of 50.
4198 
4199    Collective
4200 
4201    Input Parameters:
4202 +  comm - MPI communicator
4203 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4204            This value should be the same as the local size used in creating the
4205            y vector for the matrix-vector product y = Ax.
4206 .  n - This value should be the same as the local size used in creating the
4207        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4208        calculated if N is given) For square matrices n is almost always m.
4209 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4210 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4211 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4212            (same value is used for all local rows)
4213 .  d_nnz - array containing the number of nonzeros in the various rows of the
4214            DIAGONAL portion of the local submatrix (possibly different for each row)
4215            or NULL, if d_nz is used to specify the nonzero structure.
4216            The size of this array is equal to the number of local rows, i.e 'm'.
4217 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4218            submatrix (same value is used for all local rows).
4219 -  o_nnz - array containing the number of nonzeros in the various rows of the
4220            OFF-DIAGONAL portion of the local submatrix (possibly different for
4221            each row) or NULL, if o_nz is used to specify the nonzero
4222            structure. The size of this array is equal to the number
4223            of local rows, i.e 'm'.
4224 
4225    Output Parameter:
4226 .  A - the matrix
4227 
4228    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4229    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4230    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4231 
4232    Notes:
4233    If the *_nnz parameter is given then the *_nz parameter is ignored
4234 
4235    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4236    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4237    storage requirements for this matrix.
4238 
4239    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4240    processor than it must be used on all processors that share the object for
4241    that argument.
4242 
4243    The user MUST specify either the local or global matrix dimensions
4244    (possibly both).
4245 
4246    The parallel matrix is partitioned across processors such that the
4247    first m0 rows belong to process 0, the next m1 rows belong to
4248    process 1, the next m2 rows belong to process 2 etc.. where
4249    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4250    values corresponding to [m x N] submatrix.
4251 
4252    The columns are logically partitioned with the n0 columns belonging
4253    to 0th partition, the next n1 columns belonging to the next
4254    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4255 
4256    The DIAGONAL portion of the local submatrix on any given processor
4257    is the submatrix corresponding to the rows and columns m,n
4258    corresponding to the given processor. i.e diagonal matrix on
4259    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4260    etc. The remaining portion of the local submatrix [m x (N-n)]
4261    constitute the OFF-DIAGONAL portion. The example below better
4262    illustrates this concept.
4263 
4264    For a square global matrix we define each processor's diagonal portion
4265    to be its local rows and the corresponding columns (a square submatrix);
4266    each processor's off-diagonal portion encompasses the remainder of the
4267    local matrix (a rectangular submatrix).
4268 
4269    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4270 
4271    When calling this routine with a single process communicator, a matrix of
4272    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4273    type of communicator, use the construction mechanism
4274 .vb
4275      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4276 .ve
4277 
4278 $     MatCreate(...,&A);
4279 $     MatSetType(A,MATMPIAIJ);
4280 $     MatSetSizes(A, m,n,M,N);
4281 $     MatMPIAIJSetPreallocation(A,...);
4282 
4283    By default, this format uses inodes (identical nodes) when possible.
4284    We search for consecutive rows with the same nonzero structure, thereby
4285    reusing matrix information to achieve increased efficiency.
4286 
4287    Options Database Keys:
4288 +  -mat_no_inode  - Do not use inodes
4289 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4290 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4291         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4292         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4293 
4294    Example usage:
4295 
4296    Consider the following 8x8 matrix with 34 non-zero values, that is
4297    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4298    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4299    as follows
4300 
4301 .vb
4302             1  2  0  |  0  3  0  |  0  4
4303     Proc0   0  5  6  |  7  0  0  |  8  0
4304             9  0 10  | 11  0  0  | 12  0
4305     -------------------------------------
4306            13  0 14  | 15 16 17  |  0  0
4307     Proc1   0 18  0  | 19 20 21  |  0  0
4308             0  0  0  | 22 23  0  | 24  0
4309     -------------------------------------
4310     Proc2  25 26 27  |  0  0 28  | 29  0
4311            30  0  0  | 31 32 33  |  0 34
4312 .ve
4313 
4314    This can be represented as a collection of submatrices as
4315 
4316 .vb
4317       A B C
4318       D E F
4319       G H I
4320 .ve
4321 
4322    Where the submatrices A,B,C are owned by proc0, D,E,F are
4323    owned by proc1, G,H,I are owned by proc2.
4324 
4325    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4326    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4327    The 'M','N' parameters are 8,8, and have the same values on all procs.
4328 
4329    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4330    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4331    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4332    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4333    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4334    matrix, ans [DF] as another SeqAIJ matrix.
4335 
4336    When d_nz, o_nz parameters are specified, d_nz storage elements are
4337    allocated for every row of the local diagonal submatrix, and o_nz
4338    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4339    One way to choose d_nz and o_nz is to use the max nonzerors per local
4340    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4341    In this case, the values of d_nz,o_nz are
4342 .vb
4343      proc0 : dnz = 2, o_nz = 2
4344      proc1 : dnz = 3, o_nz = 2
4345      proc2 : dnz = 1, o_nz = 4
4346 .ve
4347    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4348    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4349    for proc3. i.e we are using 12+15+10=37 storage locations to store
4350    34 values.
4351 
4352    When d_nnz, o_nnz parameters are specified, the storage is specified
4353    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4354    In the above case the values for d_nnz,o_nnz are
4355 .vb
4356      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4357      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4358      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4359 .ve
4360    Here the space allocated is sum of all the above values i.e 34, and
4361    hence pre-allocation is perfect.
4362 
4363    Level: intermediate
4364 
4365 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4366           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4367 @*/
4368 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4369 {
4370   PetscMPIInt    size;
4371 
4372   PetscFunctionBegin;
4373   PetscCall(MatCreate(comm,A));
4374   PetscCall(MatSetSizes(*A,m,n,M,N));
4375   PetscCallMPI(MPI_Comm_size(comm,&size));
4376   if (size > 1) {
4377     PetscCall(MatSetType(*A,MATMPIAIJ));
4378     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4379   } else {
4380     PetscCall(MatSetType(*A,MATSEQAIJ));
4381     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4382   }
4383   PetscFunctionReturn(0);
4384 }
4385 
4386 /*@C
4387   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4388 
4389   Not collective
4390 
4391   Input Parameter:
4392 . A - The MPIAIJ matrix
4393 
4394   Output Parameters:
4395 + Ad - The local diagonal block as a SeqAIJ matrix
4396 . Ao - The local off-diagonal block as a SeqAIJ matrix
4397 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4398 
4399   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4400   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4401   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4402   local column numbers to global column numbers in the original matrix.
4403 
4404   Level: intermediate
4405 
4406 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4407 @*/
4408 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4409 {
4410   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4411   PetscBool      flg;
4412 
4413   PetscFunctionBegin;
4414   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4415   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4416   if (Ad)     *Ad     = a->A;
4417   if (Ao)     *Ao     = a->B;
4418   if (colmap) *colmap = a->garray;
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4423 {
4424   PetscInt       m,N,i,rstart,nnz,Ii;
4425   PetscInt       *indx;
4426   PetscScalar    *values;
4427   MatType        rootType;
4428 
4429   PetscFunctionBegin;
4430   PetscCall(MatGetSize(inmat,&m,&N));
4431   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4432     PetscInt       *dnz,*onz,sum,bs,cbs;
4433 
4434     if (n == PETSC_DECIDE) {
4435       PetscCall(PetscSplitOwnership(comm,&n,&N));
4436     }
4437     /* Check sum(n) = N */
4438     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4439     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4440 
4441     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4442     rstart -= m;
4443 
4444     MatPreallocateBegin(comm,m,n,dnz,onz);
4445     for (i=0; i<m; i++) {
4446       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4447       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4448       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4449     }
4450 
4451     PetscCall(MatCreate(comm,outmat));
4452     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4453     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4454     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4455     PetscCall(MatGetRootType_Private(inmat,&rootType));
4456     PetscCall(MatSetType(*outmat,rootType));
4457     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4458     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4459     MatPreallocateEnd(dnz,onz);
4460     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4461   }
4462 
4463   /* numeric phase */
4464   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4465   for (i=0; i<m; i++) {
4466     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4467     Ii   = i + rstart;
4468     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4469     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4470   }
4471   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4472   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4473   PetscFunctionReturn(0);
4474 }
4475 
4476 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4477 {
4478   PetscMPIInt       rank;
4479   PetscInt          m,N,i,rstart,nnz;
4480   size_t            len;
4481   const PetscInt    *indx;
4482   PetscViewer       out;
4483   char              *name;
4484   Mat               B;
4485   const PetscScalar *values;
4486 
4487   PetscFunctionBegin;
4488   PetscCall(MatGetLocalSize(A,&m,NULL));
4489   PetscCall(MatGetSize(A,NULL,&N));
4490   /* Should this be the type of the diagonal block of A? */
4491   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4492   PetscCall(MatSetSizes(B,m,N,m,N));
4493   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4494   PetscCall(MatSetType(B,MATSEQAIJ));
4495   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4496   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4497   for (i=0; i<m; i++) {
4498     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4499     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4500     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4501   }
4502   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4503   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4504 
4505   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4506   PetscCall(PetscStrlen(outfile,&len));
4507   PetscCall(PetscMalloc1(len+6,&name));
4508   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4509   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4510   PetscCall(PetscFree(name));
4511   PetscCall(MatView(B,out));
4512   PetscCall(PetscViewerDestroy(&out));
4513   PetscCall(MatDestroy(&B));
4514   PetscFunctionReturn(0);
4515 }
4516 
4517 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4518 {
4519   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4520 
4521   PetscFunctionBegin;
4522   if (!merge) PetscFunctionReturn(0);
4523   PetscCall(PetscFree(merge->id_r));
4524   PetscCall(PetscFree(merge->len_s));
4525   PetscCall(PetscFree(merge->len_r));
4526   PetscCall(PetscFree(merge->bi));
4527   PetscCall(PetscFree(merge->bj));
4528   PetscCall(PetscFree(merge->buf_ri[0]));
4529   PetscCall(PetscFree(merge->buf_ri));
4530   PetscCall(PetscFree(merge->buf_rj[0]));
4531   PetscCall(PetscFree(merge->buf_rj));
4532   PetscCall(PetscFree(merge->coi));
4533   PetscCall(PetscFree(merge->coj));
4534   PetscCall(PetscFree(merge->owners_co));
4535   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4536   PetscCall(PetscFree(merge));
4537   PetscFunctionReturn(0);
4538 }
4539 
4540 #include <../src/mat/utils/freespace.h>
4541 #include <petscbt.h>
4542 
4543 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4544 {
4545   MPI_Comm            comm;
4546   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4547   PetscMPIInt         size,rank,taga,*len_s;
4548   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4549   PetscInt            proc,m;
4550   PetscInt            **buf_ri,**buf_rj;
4551   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4552   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4553   MPI_Request         *s_waits,*r_waits;
4554   MPI_Status          *status;
4555   const MatScalar     *aa,*a_a;
4556   MatScalar           **abuf_r,*ba_i;
4557   Mat_Merge_SeqsToMPI *merge;
4558   PetscContainer      container;
4559 
4560   PetscFunctionBegin;
4561   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4562   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4563 
4564   PetscCallMPI(MPI_Comm_size(comm,&size));
4565   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4566 
4567   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4568   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4569   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4570   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4571   aa   = a_a;
4572 
4573   bi     = merge->bi;
4574   bj     = merge->bj;
4575   buf_ri = merge->buf_ri;
4576   buf_rj = merge->buf_rj;
4577 
4578   PetscCall(PetscMalloc1(size,&status));
4579   owners = merge->rowmap->range;
4580   len_s  = merge->len_s;
4581 
4582   /* send and recv matrix values */
4583   /*-----------------------------*/
4584   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4585   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4586 
4587   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4588   for (proc=0,k=0; proc<size; proc++) {
4589     if (!len_s[proc]) continue;
4590     i    = owners[proc];
4591     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4592     k++;
4593   }
4594 
4595   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4596   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4597   PetscCall(PetscFree(status));
4598 
4599   PetscCall(PetscFree(s_waits));
4600   PetscCall(PetscFree(r_waits));
4601 
4602   /* insert mat values of mpimat */
4603   /*----------------------------*/
4604   PetscCall(PetscMalloc1(N,&ba_i));
4605   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4606 
4607   for (k=0; k<merge->nrecv; k++) {
4608     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4609     nrows       = *(buf_ri_k[k]);
4610     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4611     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4612   }
4613 
4614   /* set values of ba */
4615   m    = merge->rowmap->n;
4616   for (i=0; i<m; i++) {
4617     arow = owners[rank] + i;
4618     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4619     bnzi = bi[i+1] - bi[i];
4620     PetscCall(PetscArrayzero(ba_i,bnzi));
4621 
4622     /* add local non-zero vals of this proc's seqmat into ba */
4623     anzi   = ai[arow+1] - ai[arow];
4624     aj     = a->j + ai[arow];
4625     aa     = a_a + ai[arow];
4626     nextaj = 0;
4627     for (j=0; nextaj<anzi; j++) {
4628       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4629         ba_i[j] += aa[nextaj++];
4630       }
4631     }
4632 
4633     /* add received vals into ba */
4634     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4635       /* i-th row */
4636       if (i == *nextrow[k]) {
4637         anzi   = *(nextai[k]+1) - *nextai[k];
4638         aj     = buf_rj[k] + *(nextai[k]);
4639         aa     = abuf_r[k] + *(nextai[k]);
4640         nextaj = 0;
4641         for (j=0; nextaj<anzi; j++) {
4642           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4643             ba_i[j] += aa[nextaj++];
4644           }
4645         }
4646         nextrow[k]++; nextai[k]++;
4647       }
4648     }
4649     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4650   }
4651   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4652   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4653   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4654 
4655   PetscCall(PetscFree(abuf_r[0]));
4656   PetscCall(PetscFree(abuf_r));
4657   PetscCall(PetscFree(ba_i));
4658   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4659   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4664 {
4665   Mat                 B_mpi;
4666   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4667   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4668   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4669   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4670   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4671   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4672   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4673   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4674   MPI_Status          *status;
4675   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4676   PetscBT             lnkbt;
4677   Mat_Merge_SeqsToMPI *merge;
4678   PetscContainer      container;
4679 
4680   PetscFunctionBegin;
4681   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4682 
4683   /* make sure it is a PETSc comm */
4684   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4685   PetscCallMPI(MPI_Comm_size(comm,&size));
4686   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4687 
4688   PetscCall(PetscNew(&merge));
4689   PetscCall(PetscMalloc1(size,&status));
4690 
4691   /* determine row ownership */
4692   /*---------------------------------------------------------*/
4693   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4694   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4695   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4696   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4697   PetscCall(PetscLayoutSetUp(merge->rowmap));
4698   PetscCall(PetscMalloc1(size,&len_si));
4699   PetscCall(PetscMalloc1(size,&merge->len_s));
4700 
4701   m      = merge->rowmap->n;
4702   owners = merge->rowmap->range;
4703 
4704   /* determine the number of messages to send, their lengths */
4705   /*---------------------------------------------------------*/
4706   len_s = merge->len_s;
4707 
4708   len          = 0; /* length of buf_si[] */
4709   merge->nsend = 0;
4710   for (proc=0; proc<size; proc++) {
4711     len_si[proc] = 0;
4712     if (proc == rank) {
4713       len_s[proc] = 0;
4714     } else {
4715       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4716       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4717     }
4718     if (len_s[proc]) {
4719       merge->nsend++;
4720       nrows = 0;
4721       for (i=owners[proc]; i<owners[proc+1]; i++) {
4722         if (ai[i+1] > ai[i]) nrows++;
4723       }
4724       len_si[proc] = 2*(nrows+1);
4725       len         += len_si[proc];
4726     }
4727   }
4728 
4729   /* determine the number and length of messages to receive for ij-structure */
4730   /*-------------------------------------------------------------------------*/
4731   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4732   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4733 
4734   /* post the Irecv of j-structure */
4735   /*-------------------------------*/
4736   PetscCall(PetscCommGetNewTag(comm,&tagj));
4737   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4738 
4739   /* post the Isend of j-structure */
4740   /*--------------------------------*/
4741   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4742 
4743   for (proc=0, k=0; proc<size; proc++) {
4744     if (!len_s[proc]) continue;
4745     i    = owners[proc];
4746     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4747     k++;
4748   }
4749 
4750   /* receives and sends of j-structure are complete */
4751   /*------------------------------------------------*/
4752   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4753   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4754 
4755   /* send and recv i-structure */
4756   /*---------------------------*/
4757   PetscCall(PetscCommGetNewTag(comm,&tagi));
4758   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4759 
4760   PetscCall(PetscMalloc1(len+1,&buf_s));
4761   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4762   for (proc=0,k=0; proc<size; proc++) {
4763     if (!len_s[proc]) continue;
4764     /* form outgoing message for i-structure:
4765          buf_si[0]:                 nrows to be sent
4766                [1:nrows]:           row index (global)
4767                [nrows+1:2*nrows+1]: i-structure index
4768     */
4769     /*-------------------------------------------*/
4770     nrows       = len_si[proc]/2 - 1;
4771     buf_si_i    = buf_si + nrows+1;
4772     buf_si[0]   = nrows;
4773     buf_si_i[0] = 0;
4774     nrows       = 0;
4775     for (i=owners[proc]; i<owners[proc+1]; i++) {
4776       anzi = ai[i+1] - ai[i];
4777       if (anzi) {
4778         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4779         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4780         nrows++;
4781       }
4782     }
4783     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4784     k++;
4785     buf_si += len_si[proc];
4786   }
4787 
4788   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4789   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4790 
4791   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4792   for (i=0; i<merge->nrecv; i++) {
4793     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4794   }
4795 
4796   PetscCall(PetscFree(len_si));
4797   PetscCall(PetscFree(len_ri));
4798   PetscCall(PetscFree(rj_waits));
4799   PetscCall(PetscFree2(si_waits,sj_waits));
4800   PetscCall(PetscFree(ri_waits));
4801   PetscCall(PetscFree(buf_s));
4802   PetscCall(PetscFree(status));
4803 
4804   /* compute a local seq matrix in each processor */
4805   /*----------------------------------------------*/
4806   /* allocate bi array and free space for accumulating nonzero column info */
4807   PetscCall(PetscMalloc1(m+1,&bi));
4808   bi[0] = 0;
4809 
4810   /* create and initialize a linked list */
4811   nlnk = N+1;
4812   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4813 
4814   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4815   len  = ai[owners[rank+1]] - ai[owners[rank]];
4816   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4817 
4818   current_space = free_space;
4819 
4820   /* determine symbolic info for each local row */
4821   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4822 
4823   for (k=0; k<merge->nrecv; k++) {
4824     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4825     nrows       = *buf_ri_k[k];
4826     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4827     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4828   }
4829 
4830   MatPreallocateBegin(comm,m,n,dnz,onz);
4831   len  = 0;
4832   for (i=0; i<m; i++) {
4833     bnzi = 0;
4834     /* add local non-zero cols of this proc's seqmat into lnk */
4835     arow  = owners[rank] + i;
4836     anzi  = ai[arow+1] - ai[arow];
4837     aj    = a->j + ai[arow];
4838     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4839     bnzi += nlnk;
4840     /* add received col data into lnk */
4841     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4842       if (i == *nextrow[k]) { /* i-th row */
4843         anzi  = *(nextai[k]+1) - *nextai[k];
4844         aj    = buf_rj[k] + *nextai[k];
4845         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4846         bnzi += nlnk;
4847         nextrow[k]++; nextai[k]++;
4848       }
4849     }
4850     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4851 
4852     /* if free space is not available, make more free space */
4853     if (current_space->local_remaining<bnzi) {
4854       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4855       nspacedouble++;
4856     }
4857     /* copy data into free space, then initialize lnk */
4858     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4859     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4860 
4861     current_space->array           += bnzi;
4862     current_space->local_used      += bnzi;
4863     current_space->local_remaining -= bnzi;
4864 
4865     bi[i+1] = bi[i] + bnzi;
4866   }
4867 
4868   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4869 
4870   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4871   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4872   PetscCall(PetscLLDestroy(lnk,lnkbt));
4873 
4874   /* create symbolic parallel matrix B_mpi */
4875   /*---------------------------------------*/
4876   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4877   PetscCall(MatCreate(comm,&B_mpi));
4878   if (n==PETSC_DECIDE) {
4879     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4880   } else {
4881     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4882   }
4883   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4884   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4885   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
4886   MatPreallocateEnd(dnz,onz);
4887   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
4888 
4889   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4890   B_mpi->assembled  = PETSC_FALSE;
4891   merge->bi         = bi;
4892   merge->bj         = bj;
4893   merge->buf_ri     = buf_ri;
4894   merge->buf_rj     = buf_rj;
4895   merge->coi        = NULL;
4896   merge->coj        = NULL;
4897   merge->owners_co  = NULL;
4898 
4899   PetscCall(PetscCommDestroy(&comm));
4900 
4901   /* attach the supporting struct to B_mpi for reuse */
4902   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
4903   PetscCall(PetscContainerSetPointer(container,merge));
4904   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
4905   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
4906   PetscCall(PetscContainerDestroy(&container));
4907   *mpimat = B_mpi;
4908 
4909   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
4910   PetscFunctionReturn(0);
4911 }
4912 
4913 /*@C
4914       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4915                  matrices from each processor
4916 
4917     Collective
4918 
4919    Input Parameters:
4920 +    comm - the communicators the parallel matrix will live on
4921 .    seqmat - the input sequential matrices
4922 .    m - number of local rows (or PETSC_DECIDE)
4923 .    n - number of local columns (or PETSC_DECIDE)
4924 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4925 
4926    Output Parameter:
4927 .    mpimat - the parallel matrix generated
4928 
4929     Level: advanced
4930 
4931    Notes:
4932      The dimensions of the sequential matrix in each processor MUST be the same.
4933      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4934      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4935 @*/
4936 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4937 {
4938   PetscMPIInt    size;
4939 
4940   PetscFunctionBegin;
4941   PetscCallMPI(MPI_Comm_size(comm,&size));
4942   if (size == 1) {
4943     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4944     if (scall == MAT_INITIAL_MATRIX) {
4945       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
4946     } else {
4947       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
4948     }
4949     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4950     PetscFunctionReturn(0);
4951   }
4952   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
4953   if (scall == MAT_INITIAL_MATRIX) {
4954     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
4955   }
4956   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
4957   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
4958   PetscFunctionReturn(0);
4959 }
4960 
4961 /*@
4962      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4963           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4964           with MatGetSize()
4965 
4966     Not Collective
4967 
4968    Input Parameters:
4969 +    A - the matrix
4970 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4971 
4972    Output Parameter:
4973 .    A_loc - the local sequential matrix generated
4974 
4975     Level: developer
4976 
4977    Notes:
4978      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4979      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4980      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4981      modify the values of the returned A_loc.
4982 
4983 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
4984 @*/
4985 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4986 {
4987   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
4988   Mat_SeqAIJ        *mat,*a,*b;
4989   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4990   const PetscScalar *aa,*ba,*aav,*bav;
4991   PetscScalar       *ca,*cam;
4992   PetscMPIInt       size;
4993   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4994   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
4995   PetscBool         match;
4996 
4997   PetscFunctionBegin;
4998   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
4999   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5000   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5001   if (size == 1) {
5002     if (scall == MAT_INITIAL_MATRIX) {
5003       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5004       *A_loc = mpimat->A;
5005     } else if (scall == MAT_REUSE_MATRIX) {
5006       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5007     }
5008     PetscFunctionReturn(0);
5009   }
5010 
5011   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5012   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5013   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5014   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5015   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5016   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5017   aa   = aav;
5018   ba   = bav;
5019   if (scall == MAT_INITIAL_MATRIX) {
5020     PetscCall(PetscMalloc1(1+am,&ci));
5021     ci[0] = 0;
5022     for (i=0; i<am; i++) {
5023       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5024     }
5025     PetscCall(PetscMalloc1(1+ci[am],&cj));
5026     PetscCall(PetscMalloc1(1+ci[am],&ca));
5027     k    = 0;
5028     for (i=0; i<am; i++) {
5029       ncols_o = bi[i+1] - bi[i];
5030       ncols_d = ai[i+1] - ai[i];
5031       /* off-diagonal portion of A */
5032       for (jo=0; jo<ncols_o; jo++) {
5033         col = cmap[*bj];
5034         if (col >= cstart) break;
5035         cj[k]   = col; bj++;
5036         ca[k++] = *ba++;
5037       }
5038       /* diagonal portion of A */
5039       for (j=0; j<ncols_d; j++) {
5040         cj[k]   = cstart + *aj++;
5041         ca[k++] = *aa++;
5042       }
5043       /* off-diagonal portion of A */
5044       for (j=jo; j<ncols_o; j++) {
5045         cj[k]   = cmap[*bj++];
5046         ca[k++] = *ba++;
5047       }
5048     }
5049     /* put together the new matrix */
5050     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5051     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5052     /* Since these are PETSc arrays, change flags to free them as necessary. */
5053     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5054     mat->free_a  = PETSC_TRUE;
5055     mat->free_ij = PETSC_TRUE;
5056     mat->nonew   = 0;
5057   } else if (scall == MAT_REUSE_MATRIX) {
5058     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5059     ci   = mat->i;
5060     cj   = mat->j;
5061     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5062     for (i=0; i<am; i++) {
5063       /* off-diagonal portion of A */
5064       ncols_o = bi[i+1] - bi[i];
5065       for (jo=0; jo<ncols_o; jo++) {
5066         col = cmap[*bj];
5067         if (col >= cstart) break;
5068         *cam++ = *ba++; bj++;
5069       }
5070       /* diagonal portion of A */
5071       ncols_d = ai[i+1] - ai[i];
5072       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5073       /* off-diagonal portion of A */
5074       for (j=jo; j<ncols_o; j++) {
5075         *cam++ = *ba++; bj++;
5076       }
5077     }
5078     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5079   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5080   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5081   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5082   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5083   PetscFunctionReturn(0);
5084 }
5085 
5086 /*@
5087      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5088           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5089 
5090     Not Collective
5091 
5092    Input Parameters:
5093 +    A - the matrix
5094 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5095 
5096    Output Parameters:
5097 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5098 -    A_loc - the local sequential matrix generated
5099 
5100     Level: developer
5101 
5102    Notes:
5103      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5104 
5105 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5106 
5107 @*/
5108 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5109 {
5110   Mat            Ao,Ad;
5111   const PetscInt *cmap;
5112   PetscMPIInt    size;
5113   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5114 
5115   PetscFunctionBegin;
5116   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5117   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5118   if (size == 1) {
5119     if (scall == MAT_INITIAL_MATRIX) {
5120       PetscCall(PetscObjectReference((PetscObject)Ad));
5121       *A_loc = Ad;
5122     } else if (scall == MAT_REUSE_MATRIX) {
5123       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5124     }
5125     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5126     PetscFunctionReturn(0);
5127   }
5128   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5129   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5130   if (f) {
5131     PetscCall((*f)(A,scall,glob,A_loc));
5132   } else {
5133     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5134     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5135     Mat_SeqAIJ        *c;
5136     PetscInt          *ai = a->i, *aj = a->j;
5137     PetscInt          *bi = b->i, *bj = b->j;
5138     PetscInt          *ci,*cj;
5139     const PetscScalar *aa,*ba;
5140     PetscScalar       *ca;
5141     PetscInt          i,j,am,dn,on;
5142 
5143     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5144     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5145     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5146     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5147     if (scall == MAT_INITIAL_MATRIX) {
5148       PetscInt k;
5149       PetscCall(PetscMalloc1(1+am,&ci));
5150       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5151       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5152       ci[0] = 0;
5153       for (i=0,k=0; i<am; i++) {
5154         const PetscInt ncols_o = bi[i+1] - bi[i];
5155         const PetscInt ncols_d = ai[i+1] - ai[i];
5156         ci[i+1] = ci[i] + ncols_o + ncols_d;
5157         /* diagonal portion of A */
5158         for (j=0; j<ncols_d; j++,k++) {
5159           cj[k] = *aj++;
5160           ca[k] = *aa++;
5161         }
5162         /* off-diagonal portion of A */
5163         for (j=0; j<ncols_o; j++,k++) {
5164           cj[k] = dn + *bj++;
5165           ca[k] = *ba++;
5166         }
5167       }
5168       /* put together the new matrix */
5169       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5170       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5171       /* Since these are PETSc arrays, change flags to free them as necessary. */
5172       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5173       c->free_a  = PETSC_TRUE;
5174       c->free_ij = PETSC_TRUE;
5175       c->nonew   = 0;
5176       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5177     } else if (scall == MAT_REUSE_MATRIX) {
5178       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5179       for (i=0; i<am; i++) {
5180         const PetscInt ncols_d = ai[i+1] - ai[i];
5181         const PetscInt ncols_o = bi[i+1] - bi[i];
5182         /* diagonal portion of A */
5183         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5184         /* off-diagonal portion of A */
5185         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5186       }
5187       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5188     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5189     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5190     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5191     if (glob) {
5192       PetscInt cst, *gidx;
5193 
5194       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5195       PetscCall(PetscMalloc1(dn+on,&gidx));
5196       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5197       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5198       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5199     }
5200   }
5201   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5202   PetscFunctionReturn(0);
5203 }
5204 
5205 /*@C
5206      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5207 
5208     Not Collective
5209 
5210    Input Parameters:
5211 +    A - the matrix
5212 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5213 -    row, col - index sets of rows and columns to extract (or NULL)
5214 
5215    Output Parameter:
5216 .    A_loc - the local sequential matrix generated
5217 
5218     Level: developer
5219 
5220 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5221 
5222 @*/
5223 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5224 {
5225   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5226   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5227   IS             isrowa,iscola;
5228   Mat            *aloc;
5229   PetscBool      match;
5230 
5231   PetscFunctionBegin;
5232   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5233   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5234   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5235   if (!row) {
5236     start = A->rmap->rstart; end = A->rmap->rend;
5237     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5238   } else {
5239     isrowa = *row;
5240   }
5241   if (!col) {
5242     start = A->cmap->rstart;
5243     cmap  = a->garray;
5244     nzA   = a->A->cmap->n;
5245     nzB   = a->B->cmap->n;
5246     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5247     ncols = 0;
5248     for (i=0; i<nzB; i++) {
5249       if (cmap[i] < start) idx[ncols++] = cmap[i];
5250       else break;
5251     }
5252     imark = i;
5253     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5254     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5255     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5256   } else {
5257     iscola = *col;
5258   }
5259   if (scall != MAT_INITIAL_MATRIX) {
5260     PetscCall(PetscMalloc1(1,&aloc));
5261     aloc[0] = *A_loc;
5262   }
5263   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5264   if (!col) { /* attach global id of condensed columns */
5265     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5266   }
5267   *A_loc = aloc[0];
5268   PetscCall(PetscFree(aloc));
5269   if (!row) {
5270     PetscCall(ISDestroy(&isrowa));
5271   }
5272   if (!col) {
5273     PetscCall(ISDestroy(&iscola));
5274   }
5275   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5276   PetscFunctionReturn(0);
5277 }
5278 
5279 /*
5280  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5281  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5282  * on a global size.
5283  * */
5284 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5285 {
5286   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5287   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5288   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5289   PetscMPIInt              owner;
5290   PetscSFNode              *iremote,*oiremote;
5291   const PetscInt           *lrowindices;
5292   PetscSF                  sf,osf;
5293   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5294   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5295   MPI_Comm                 comm;
5296   ISLocalToGlobalMapping   mapping;
5297   const PetscScalar        *pd_a,*po_a;
5298 
5299   PetscFunctionBegin;
5300   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5301   /* plocalsize is the number of roots
5302    * nrows is the number of leaves
5303    * */
5304   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5305   PetscCall(ISGetLocalSize(rows,&nrows));
5306   PetscCall(PetscCalloc1(nrows,&iremote));
5307   PetscCall(ISGetIndices(rows,&lrowindices));
5308   for (i=0;i<nrows;i++) {
5309     /* Find a remote index and an owner for a row
5310      * The row could be local or remote
5311      * */
5312     owner = 0;
5313     lidx  = 0;
5314     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5315     iremote[i].index = lidx;
5316     iremote[i].rank  = owner;
5317   }
5318   /* Create SF to communicate how many nonzero columns for each row */
5319   PetscCall(PetscSFCreate(comm,&sf));
5320   /* SF will figure out the number of nonzero colunms for each row, and their
5321    * offsets
5322    * */
5323   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5324   PetscCall(PetscSFSetFromOptions(sf));
5325   PetscCall(PetscSFSetUp(sf));
5326 
5327   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5328   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5329   PetscCall(PetscCalloc1(nrows,&pnnz));
5330   roffsets[0] = 0;
5331   roffsets[1] = 0;
5332   for (i=0;i<plocalsize;i++) {
5333     /* diag */
5334     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5335     /* off diag */
5336     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5337     /* compute offsets so that we relative location for each row */
5338     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5339     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5340   }
5341   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5342   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5343   /* 'r' means root, and 'l' means leaf */
5344   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5345   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5346   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5347   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5348   PetscCall(PetscSFDestroy(&sf));
5349   PetscCall(PetscFree(roffsets));
5350   PetscCall(PetscFree(nrcols));
5351   dntotalcols = 0;
5352   ontotalcols = 0;
5353   ncol = 0;
5354   for (i=0;i<nrows;i++) {
5355     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5356     ncol = PetscMax(pnnz[i],ncol);
5357     /* diag */
5358     dntotalcols += nlcols[i*2+0];
5359     /* off diag */
5360     ontotalcols += nlcols[i*2+1];
5361   }
5362   /* We do not need to figure the right number of columns
5363    * since all the calculations will be done by going through the raw data
5364    * */
5365   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5366   PetscCall(MatSetUp(*P_oth));
5367   PetscCall(PetscFree(pnnz));
5368   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5369   /* diag */
5370   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5371   /* off diag */
5372   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5373   /* diag */
5374   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5375   /* off diag */
5376   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5377   dntotalcols = 0;
5378   ontotalcols = 0;
5379   ntotalcols  = 0;
5380   for (i=0;i<nrows;i++) {
5381     owner = 0;
5382     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5383     /* Set iremote for diag matrix */
5384     for (j=0;j<nlcols[i*2+0];j++) {
5385       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5386       iremote[dntotalcols].rank    = owner;
5387       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5388       ilocal[dntotalcols++]        = ntotalcols++;
5389     }
5390     /* off diag */
5391     for (j=0;j<nlcols[i*2+1];j++) {
5392       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5393       oiremote[ontotalcols].rank    = owner;
5394       oilocal[ontotalcols++]        = ntotalcols++;
5395     }
5396   }
5397   PetscCall(ISRestoreIndices(rows,&lrowindices));
5398   PetscCall(PetscFree(loffsets));
5399   PetscCall(PetscFree(nlcols));
5400   PetscCall(PetscSFCreate(comm,&sf));
5401   /* P serves as roots and P_oth is leaves
5402    * Diag matrix
5403    * */
5404   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5405   PetscCall(PetscSFSetFromOptions(sf));
5406   PetscCall(PetscSFSetUp(sf));
5407 
5408   PetscCall(PetscSFCreate(comm,&osf));
5409   /* Off diag */
5410   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5411   PetscCall(PetscSFSetFromOptions(osf));
5412   PetscCall(PetscSFSetUp(osf));
5413   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5414   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5415   /* We operate on the matrix internal data for saving memory */
5416   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5417   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5418   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5419   /* Convert to global indices for diag matrix */
5420   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5421   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5422   /* We want P_oth store global indices */
5423   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5424   /* Use memory scalable approach */
5425   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5426   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5427   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5428   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5429   /* Convert back to local indices */
5430   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5431   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5432   nout = 0;
5433   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5434   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5435   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5436   /* Exchange values */
5437   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5438   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5439   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5440   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5441   /* Stop PETSc from shrinking memory */
5442   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5443   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5444   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5445   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5446   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5447   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5448   PetscCall(PetscSFDestroy(&sf));
5449   PetscCall(PetscSFDestroy(&osf));
5450   PetscFunctionReturn(0);
5451 }
5452 
5453 /*
5454  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5455  * This supports MPIAIJ and MAIJ
5456  * */
5457 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5458 {
5459   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5460   Mat_SeqAIJ            *p_oth;
5461   IS                    rows,map;
5462   PetscHMapI            hamp;
5463   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5464   MPI_Comm              comm;
5465   PetscSF               sf,osf;
5466   PetscBool             has;
5467 
5468   PetscFunctionBegin;
5469   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5470   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5471   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5472    *  and then create a submatrix (that often is an overlapping matrix)
5473    * */
5474   if (reuse == MAT_INITIAL_MATRIX) {
5475     /* Use a hash table to figure out unique keys */
5476     PetscCall(PetscHMapICreate(&hamp));
5477     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5478     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5479     count = 0;
5480     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5481     for (i=0;i<a->B->cmap->n;i++) {
5482       key  = a->garray[i]/dof;
5483       PetscCall(PetscHMapIHas(hamp,key,&has));
5484       if (!has) {
5485         mapping[i] = count;
5486         PetscCall(PetscHMapISet(hamp,key,count++));
5487       } else {
5488         /* Current 'i' has the same value the previous step */
5489         mapping[i] = count-1;
5490       }
5491     }
5492     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5493     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5494     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5495     PetscCall(PetscCalloc1(htsize,&rowindices));
5496     off = 0;
5497     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5498     PetscCall(PetscHMapIDestroy(&hamp));
5499     PetscCall(PetscSortInt(htsize,rowindices));
5500     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5501     /* In case, the matrix was already created but users want to recreate the matrix */
5502     PetscCall(MatDestroy(P_oth));
5503     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5504     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5505     PetscCall(ISDestroy(&map));
5506     PetscCall(ISDestroy(&rows));
5507   } else if (reuse == MAT_REUSE_MATRIX) {
5508     /* If matrix was already created, we simply update values using SF objects
5509      * that as attached to the matrix ealier.
5510      */
5511     const PetscScalar *pd_a,*po_a;
5512 
5513     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5514     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5515     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5516     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5517     /* Update values in place */
5518     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5519     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5520     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5521     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5522     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5523     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5524     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5525     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5526   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5527   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5528   PetscFunctionReturn(0);
5529 }
5530 
5531 /*@C
5532   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5533 
5534   Collective on Mat
5535 
5536   Input Parameters:
5537 + A - the first matrix in mpiaij format
5538 . B - the second matrix in mpiaij format
5539 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5540 
5541   Output Parameters:
5542 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5543 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5544 - B_seq - the sequential matrix generated
5545 
5546   Level: developer
5547 
5548 @*/
5549 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5550 {
5551   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5552   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5553   IS             isrowb,iscolb;
5554   Mat            *bseq=NULL;
5555 
5556   PetscFunctionBegin;
5557   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5558     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5559   }
5560   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5561 
5562   if (scall == MAT_INITIAL_MATRIX) {
5563     start = A->cmap->rstart;
5564     cmap  = a->garray;
5565     nzA   = a->A->cmap->n;
5566     nzB   = a->B->cmap->n;
5567     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5568     ncols = 0;
5569     for (i=0; i<nzB; i++) {  /* row < local row index */
5570       if (cmap[i] < start) idx[ncols++] = cmap[i];
5571       else break;
5572     }
5573     imark = i;
5574     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5575     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5576     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5577     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5578   } else {
5579     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5580     isrowb  = *rowb; iscolb = *colb;
5581     PetscCall(PetscMalloc1(1,&bseq));
5582     bseq[0] = *B_seq;
5583   }
5584   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5585   *B_seq = bseq[0];
5586   PetscCall(PetscFree(bseq));
5587   if (!rowb) {
5588     PetscCall(ISDestroy(&isrowb));
5589   } else {
5590     *rowb = isrowb;
5591   }
5592   if (!colb) {
5593     PetscCall(ISDestroy(&iscolb));
5594   } else {
5595     *colb = iscolb;
5596   }
5597   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5598   PetscFunctionReturn(0);
5599 }
5600 
5601 /*
5602     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5603     of the OFF-DIAGONAL portion of local A
5604 
5605     Collective on Mat
5606 
5607    Input Parameters:
5608 +    A,B - the matrices in mpiaij format
5609 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5610 
5611    Output Parameter:
5612 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5613 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5614 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5615 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5616 
5617     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5618      for this matrix. This is not desirable..
5619 
5620     Level: developer
5621 
5622 */
5623 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5624 {
5625   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5626   Mat_SeqAIJ             *b_oth;
5627   VecScatter             ctx;
5628   MPI_Comm               comm;
5629   const PetscMPIInt      *rprocs,*sprocs;
5630   const PetscInt         *srow,*rstarts,*sstarts;
5631   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5632   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5633   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5634   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5635   PetscMPIInt            size,tag,rank,nreqs;
5636 
5637   PetscFunctionBegin;
5638   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5639   PetscCallMPI(MPI_Comm_size(comm,&size));
5640 
5641   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5642     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5643   }
5644   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5645   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5646 
5647   if (size == 1) {
5648     startsj_s = NULL;
5649     bufa_ptr  = NULL;
5650     *B_oth    = NULL;
5651     PetscFunctionReturn(0);
5652   }
5653 
5654   ctx = a->Mvctx;
5655   tag = ((PetscObject)ctx)->tag;
5656 
5657   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5658   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5659   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5660   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5661   PetscCall(PetscMalloc1(nreqs,&reqs));
5662   rwaits = reqs;
5663   swaits = reqs + nrecvs;
5664 
5665   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5666   if (scall == MAT_INITIAL_MATRIX) {
5667     /* i-array */
5668     /*---------*/
5669     /*  post receives */
5670     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5671     for (i=0; i<nrecvs; i++) {
5672       rowlen = rvalues + rstarts[i]*rbs;
5673       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5674       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5675     }
5676 
5677     /* pack the outgoing message */
5678     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5679 
5680     sstartsj[0] = 0;
5681     rstartsj[0] = 0;
5682     len         = 0; /* total length of j or a array to be sent */
5683     if (nsends) {
5684       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5685       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5686     }
5687     for (i=0; i<nsends; i++) {
5688       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5689       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5690       for (j=0; j<nrows; j++) {
5691         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5692         for (l=0; l<sbs; l++) {
5693           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5694 
5695           rowlen[j*sbs+l] = ncols;
5696 
5697           len += ncols;
5698           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5699         }
5700         k++;
5701       }
5702       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5703 
5704       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5705     }
5706     /* recvs and sends of i-array are completed */
5707     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5708     PetscCall(PetscFree(svalues));
5709 
5710     /* allocate buffers for sending j and a arrays */
5711     PetscCall(PetscMalloc1(len+1,&bufj));
5712     PetscCall(PetscMalloc1(len+1,&bufa));
5713 
5714     /* create i-array of B_oth */
5715     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5716 
5717     b_othi[0] = 0;
5718     len       = 0; /* total length of j or a array to be received */
5719     k         = 0;
5720     for (i=0; i<nrecvs; i++) {
5721       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5722       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5723       for (j=0; j<nrows; j++) {
5724         b_othi[k+1] = b_othi[k] + rowlen[j];
5725         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5726         k++;
5727       }
5728       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5729     }
5730     PetscCall(PetscFree(rvalues));
5731 
5732     /* allocate space for j and a arrrays of B_oth */
5733     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5734     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5735 
5736     /* j-array */
5737     /*---------*/
5738     /*  post receives of j-array */
5739     for (i=0; i<nrecvs; i++) {
5740       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5741       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5742     }
5743 
5744     /* pack the outgoing message j-array */
5745     if (nsends) k = sstarts[0];
5746     for (i=0; i<nsends; i++) {
5747       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5748       bufJ  = bufj+sstartsj[i];
5749       for (j=0; j<nrows; j++) {
5750         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5751         for (ll=0; ll<sbs; ll++) {
5752           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5753           for (l=0; l<ncols; l++) {
5754             *bufJ++ = cols[l];
5755           }
5756           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5757         }
5758       }
5759       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5760     }
5761 
5762     /* recvs and sends of j-array are completed */
5763     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5764   } else if (scall == MAT_REUSE_MATRIX) {
5765     sstartsj = *startsj_s;
5766     rstartsj = *startsj_r;
5767     bufa     = *bufa_ptr;
5768     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5769     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5770   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5771 
5772   /* a-array */
5773   /*---------*/
5774   /*  post receives of a-array */
5775   for (i=0; i<nrecvs; i++) {
5776     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5777     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5778   }
5779 
5780   /* pack the outgoing message a-array */
5781   if (nsends) k = sstarts[0];
5782   for (i=0; i<nsends; i++) {
5783     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5784     bufA  = bufa+sstartsj[i];
5785     for (j=0; j<nrows; j++) {
5786       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5787       for (ll=0; ll<sbs; ll++) {
5788         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5789         for (l=0; l<ncols; l++) {
5790           *bufA++ = vals[l];
5791         }
5792         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5793       }
5794     }
5795     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5796   }
5797   /* recvs and sends of a-array are completed */
5798   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5799   PetscCall(PetscFree(reqs));
5800 
5801   if (scall == MAT_INITIAL_MATRIX) {
5802     /* put together the new matrix */
5803     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5804 
5805     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5806     /* Since these are PETSc arrays, change flags to free them as necessary. */
5807     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5808     b_oth->free_a  = PETSC_TRUE;
5809     b_oth->free_ij = PETSC_TRUE;
5810     b_oth->nonew   = 0;
5811 
5812     PetscCall(PetscFree(bufj));
5813     if (!startsj_s || !bufa_ptr) {
5814       PetscCall(PetscFree2(sstartsj,rstartsj));
5815       PetscCall(PetscFree(bufa_ptr));
5816     } else {
5817       *startsj_s = sstartsj;
5818       *startsj_r = rstartsj;
5819       *bufa_ptr  = bufa;
5820     }
5821   } else if (scall == MAT_REUSE_MATRIX) {
5822     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5823   }
5824 
5825   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5826   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5827   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5828   PetscFunctionReturn(0);
5829 }
5830 
5831 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5832 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5833 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5834 #if defined(PETSC_HAVE_MKL_SPARSE)
5835 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5836 #endif
5837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5838 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5839 #if defined(PETSC_HAVE_ELEMENTAL)
5840 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5841 #endif
5842 #if defined(PETSC_HAVE_SCALAPACK)
5843 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5844 #endif
5845 #if defined(PETSC_HAVE_HYPRE)
5846 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5847 #endif
5848 #if defined(PETSC_HAVE_CUDA)
5849 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5850 #endif
5851 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5852 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5853 #endif
5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5855 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5856 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5857 
5858 /*
5859     Computes (B'*A')' since computing B*A directly is untenable
5860 
5861                n                       p                          p
5862         [             ]       [             ]         [                 ]
5863       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5864         [             ]       [             ]         [                 ]
5865 
5866 */
5867 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5868 {
5869   Mat            At,Bt,Ct;
5870 
5871   PetscFunctionBegin;
5872   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
5873   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
5874   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
5875   PetscCall(MatDestroy(&At));
5876   PetscCall(MatDestroy(&Bt));
5877   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
5878   PetscCall(MatDestroy(&Ct));
5879   PetscFunctionReturn(0);
5880 }
5881 
5882 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5883 {
5884   PetscBool      cisdense;
5885 
5886   PetscFunctionBegin;
5887   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5888   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
5889   PetscCall(MatSetBlockSizesFromMats(C,A,B));
5890   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
5891   if (!cisdense) {
5892     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
5893   }
5894   PetscCall(MatSetUp(C));
5895 
5896   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5897   PetscFunctionReturn(0);
5898 }
5899 
5900 /* ----------------------------------------------------------------*/
5901 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5902 {
5903   Mat_Product *product = C->product;
5904   Mat         A = product->A,B=product->B;
5905 
5906   PetscFunctionBegin;
5907   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5908     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5909 
5910   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5911   C->ops->productsymbolic = MatProductSymbolic_AB;
5912   PetscFunctionReturn(0);
5913 }
5914 
5915 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5916 {
5917   Mat_Product    *product = C->product;
5918 
5919   PetscFunctionBegin;
5920   if (product->type == MATPRODUCT_AB) {
5921     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
5922   }
5923   PetscFunctionReturn(0);
5924 }
5925 
5926 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
5927    is greater than value, or last if there is no such element.
5928 */
5929 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
5930 {
5931   PetscCount  it,step,count = last - first;
5932 
5933   PetscFunctionBegin;
5934   while (count > 0) {
5935     it   = first;
5936     step = count / 2;
5937     it  += step;
5938     if (!(value < array[it])) {
5939       first  = ++it;
5940       count -= step + 1;
5941     } else count = step;
5942   }
5943   *upper = first;
5944   PetscFunctionReturn(0);
5945 }
5946 
5947 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix
5948 
5949   Input Parameters:
5950 
5951     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5952     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5953 
5954     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat
5955 
5956     For Set1, j1[] contains column indices of the nonzeros.
5957     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5958     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5959     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
5960 
5961     Similar for Set2.
5962 
5963     This routine merges the two sets of nonzeros row by row and removes repeats.
5964 
5965   Output Parameters: (memories are allocated by the caller)
5966 
5967     i[],j[]: the CSR of the merged matrix, which has m rows.
5968     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5969     imap2[]: similar to imap1[], but for Set2.
5970     Note we order nonzeros row-by-row and from left to right.
5971 */
5972 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
5973   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
5974   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
5975 {
5976   PetscInt       r,m; /* Row index of mat */
5977   PetscCount     t,t1,t2,b1,e1,b2,e2;
5978 
5979   PetscFunctionBegin;
5980   PetscCall(MatGetLocalSize(mat,&m,NULL));
5981   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
5982   i[0] = 0;
5983   for (r=0; r<m; r++) { /* Do row by row merging */
5984     b1   = rowBegin1[r];
5985     e1   = rowEnd1[r];
5986     b2   = rowBegin2[r];
5987     e2   = rowEnd2[r];
5988     while (b1 < e1 && b2 < e2) {
5989       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
5990         j[t]      = j1[b1];
5991         imap1[t1] = t;
5992         imap2[t2] = t;
5993         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
5994         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
5995         t1++; t2++; t++;
5996       } else if (j1[b1] < j2[b2]) {
5997         j[t]      = j1[b1];
5998         imap1[t1] = t;
5999         b1       += jmap1[t1+1] - jmap1[t1];
6000         t1++; t++;
6001       } else {
6002         j[t]      = j2[b2];
6003         imap2[t2] = t;
6004         b2       += jmap2[t2+1] - jmap2[t2];
6005         t2++; t++;
6006       }
6007     }
6008     /* Merge the remaining in either j1[] or j2[] */
6009     while (b1 < e1) {
6010       j[t]      = j1[b1];
6011       imap1[t1] = t;
6012       b1       += jmap1[t1+1] - jmap1[t1];
6013       t1++; t++;
6014     }
6015     while (b2 < e2) {
6016       j[t]      = j2[b2];
6017       imap2[t2] = t;
6018       b2       += jmap2[t2+1] - jmap2[t2];
6019       t2++; t++;
6020     }
6021     i[r+1] = t;
6022   }
6023   PetscFunctionReturn(0);
6024 }
6025 
6026 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block
6027 
6028   Input Parameters:
6029     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6030     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6031       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6032 
6033       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6034       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6035 
6036   Output Parameters:
6037     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6038     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6039       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6040       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6041 
6042     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6043       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6044         repeats (i.e., same 'i,j' pair).
6045       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6046         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6047 
6048       Atot: number of entries belonging to the diagonal block
6049       Annz: number of unique nonzeros belonging to the diagonal block.
6050 
6051     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6052 
6053     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
6054 */
6055 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6056   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6057   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6058   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6059 {
6060   PetscInt          cstart,cend,rstart,rend,row,col;
6061   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6062   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6063   PetscCount        k,m,p,q,r,s,mid;
6064   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6065 
6066   PetscFunctionBegin;
6067   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6068   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6069   m    = rend - rstart;
6070 
6071   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6072 
6073   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6074      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6075   */
6076   while (k<n) {
6077     row = i[k];
6078     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6079     for (s=k; s<n; s++) if (i[s] != row) break;
6080     for (p=k; p<s; p++) {
6081       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6082       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6083     }
6084     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6085     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6086     rowBegin[row-rstart] = k;
6087     rowMid[row-rstart]   = mid;
6088     rowEnd[row-rstart]   = s;
6089 
6090     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6091     Atot += mid - k;
6092     Btot += s - mid;
6093 
6094     /* Count unique nonzeros of this diag/offdiag row */
6095     for (p=k; p<mid;) {
6096       col = j[p];
6097       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6098       Annz++;
6099     }
6100 
6101     for (p=mid; p<s;) {
6102       col = j[p];
6103       do {p++;} while (p<s && j[p] == col);
6104       Bnnz++;
6105     }
6106     k = s;
6107   }
6108 
6109   /* Allocation according to Atot, Btot, Annz, Bnnz */
6110   PetscCall(PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap));
6111 
6112   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6113   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6114   for (r=0; r<m; r++) {
6115     k     = rowBegin[r];
6116     mid   = rowMid[r];
6117     s     = rowEnd[r];
6118     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6119     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6120     Atot += mid - k;
6121     Btot += s - mid;
6122 
6123     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6124     for (p=k; p<mid;) {
6125       col = j[p];
6126       q   = p;
6127       do {p++;} while (p<mid && j[p] == col);
6128       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6129       Annz++;
6130     }
6131 
6132     for (p=mid; p<s;) {
6133       col = j[p];
6134       q   = p;
6135       do {p++;} while (p<s && j[p] == col);
6136       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6137       Bnnz++;
6138     }
6139   }
6140   /* Output */
6141   *Aperm_ = Aperm;
6142   *Annz_  = Annz;
6143   *Atot_  = Atot;
6144   *Ajmap_ = Ajmap;
6145   *Bperm_ = Bperm;
6146   *Bnnz_  = Bnnz;
6147   *Btot_  = Btot;
6148   *Bjmap_ = Bjmap;
6149   PetscFunctionReturn(0);
6150 }
6151 
6152 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6153 {
6154   MPI_Comm                  comm;
6155   PetscMPIInt               rank,size;
6156   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6157   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6158   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6159 
6160   PetscFunctionBegin;
6161   PetscCall(PetscFree(mpiaij->garray));
6162   PetscCall(VecDestroy(&mpiaij->lvec));
6163 #if defined(PETSC_USE_CTABLE)
6164   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6165 #else
6166   PetscCall(PetscFree(mpiaij->colmap));
6167 #endif
6168   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6169   mat->assembled = PETSC_FALSE;
6170   mat->was_assembled = PETSC_FALSE;
6171   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6172 
6173   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6174   PetscCallMPI(MPI_Comm_size(comm,&size));
6175   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6176   PetscCall(PetscLayoutSetUp(mat->rmap));
6177   PetscCall(PetscLayoutSetUp(mat->cmap));
6178   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6179   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6180   PetscCall(MatGetLocalSize(mat,&m,&n));
6181   PetscCall(MatGetSize(mat,&M,&N));
6182 
6183   /* ---------------------------------------------------------------------------*/
6184   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6185   /* entries come first, then local rows, then remote rows.                     */
6186   /* ---------------------------------------------------------------------------*/
6187   PetscCount n1 = coo_n,*perm1;
6188   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6189   PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1));
6190   PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */
6191   PetscCall(PetscArraycpy(j1,coo_j,n1));
6192   for (k=0; k<n1; k++) perm1[k] = k;
6193 
6194   /* Manipulate indices so that entries with negative row or col indices will have smallest
6195      row indices, local entries will have greater but negative row indices, and remote entries
6196      will have positive row indices.
6197   */
6198   for (k=0; k<n1; k++) {
6199     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6200     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6201     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6202     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6203   }
6204 
6205   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6206   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6207   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6208   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6209   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6210 
6211   /* ---------------------------------------------------------------------------*/
6212   /*           Split local rows into diag/offdiag portions                      */
6213   /* ---------------------------------------------------------------------------*/
6214   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6215   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6216   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6217 
6218   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6219   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6220   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6221 
6222   /* ---------------------------------------------------------------------------*/
6223   /*           Send remote rows to their owner                                  */
6224   /* ---------------------------------------------------------------------------*/
6225   /* Find which rows should be sent to which remote ranks*/
6226   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6227   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6228   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6229   const PetscInt *ranges;
6230   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6231 
6232   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6233   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6234   for (k=rem; k<n1;) {
6235     PetscMPIInt  owner;
6236     PetscInt     firstRow,lastRow;
6237 
6238     /* Locate a row range */
6239     firstRow = i1[k]; /* first row of this owner */
6240     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6241     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6242 
6243     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6244     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6245 
6246     /* All entries in [k,p) belong to this remote owner */
6247     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6248       PetscMPIInt *sendto2;
6249       PetscInt    *nentries2;
6250       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6251 
6252       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6253       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6254       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6255       PetscCall(PetscFree2(sendto,nentries2));
6256       sendto      = sendto2;
6257       nentries    = nentries2;
6258       maxNsend    = maxNsend2;
6259     }
6260     sendto[nsend]   = owner;
6261     nentries[nsend] = p - k;
6262     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6263     nsend++;
6264     k = p;
6265   }
6266 
6267   /* Build 1st SF to know offsets on remote to send data */
6268   PetscSF     sf1;
6269   PetscInt    nroots = 1,nroots2 = 0;
6270   PetscInt    nleaves = nsend,nleaves2 = 0;
6271   PetscInt    *offsets;
6272   PetscSFNode *iremote;
6273 
6274   PetscCall(PetscSFCreate(comm,&sf1));
6275   PetscCall(PetscMalloc1(nsend,&iremote));
6276   PetscCall(PetscMalloc1(nsend,&offsets));
6277   for (k=0; k<nsend; k++) {
6278     iremote[k].rank  = sendto[k];
6279     iremote[k].index = 0;
6280     nleaves2        += nentries[k];
6281     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6282   }
6283   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6284   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6285   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6286   PetscCall(PetscSFDestroy(&sf1));
6287   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);
6288 
6289   /* Build 2nd SF to send remote COOs to their owner */
6290   PetscSF sf2;
6291   nroots  = nroots2;
6292   nleaves = nleaves2;
6293   PetscCall(PetscSFCreate(comm,&sf2));
6294   PetscCall(PetscSFSetFromOptions(sf2));
6295   PetscCall(PetscMalloc1(nleaves,&iremote));
6296   p       = 0;
6297   for (k=0; k<nsend; k++) {
6298     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6299     for (q=0; q<nentries[k]; q++,p++) {
6300       iremote[p].rank  = sendto[k];
6301       iremote[p].index = offsets[k] + q;
6302     }
6303   }
6304   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6305 
6306   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
6307   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6308 
6309   /* Send the remote COOs to their owner */
6310   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6311   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6312   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6313   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6314   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6315   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6316   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6317 
6318   PetscCall(PetscFree(offsets));
6319   PetscCall(PetscFree2(sendto,nentries));
6320 
6321   /* ---------------------------------------------------------------*/
6322   /* Sort received COOs by row along with the permutation array     */
6323   /* ---------------------------------------------------------------*/
6324   for (k=0; k<n2; k++) perm2[k] = k;
6325   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6326 
6327   /* ---------------------------------------------------------------*/
6328   /* Split received COOs into diag/offdiag portions                 */
6329   /* ---------------------------------------------------------------*/
6330   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6331   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6332   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6333 
6334   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6335   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6336 
6337   /* --------------------------------------------------------------------------*/
6338   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6339   /* --------------------------------------------------------------------------*/
6340   PetscInt   *Ai,*Bi;
6341   PetscInt   *Aj,*Bj;
6342 
6343   PetscCall(PetscMalloc1(m+1,&Ai));
6344   PetscCall(PetscMalloc1(m+1,&Bi));
6345   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6346   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6347 
6348   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6349   PetscCall(PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2));
6350 
6351   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6352   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6353   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6354   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6355   PetscCall(PetscFree3(i1,j1,perm1));
6356   PetscCall(PetscFree3(i2,j2,perm2));
6357 
6358   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6359   PetscInt Annz = Ai[m];
6360   PetscInt Bnnz = Bi[m];
6361   if (Annz < Annz1 + Annz2) {
6362     PetscInt *Aj_new;
6363     PetscCall(PetscMalloc1(Annz,&Aj_new));
6364     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6365     PetscCall(PetscFree(Aj));
6366     Aj   = Aj_new;
6367   }
6368 
6369   if (Bnnz < Bnnz1 + Bnnz2) {
6370     PetscInt *Bj_new;
6371     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6372     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6373     PetscCall(PetscFree(Bj));
6374     Bj   = Bj_new;
6375   }
6376 
6377   /* --------------------------------------------------------------------------------*/
6378   /* Create new submatrices for on-process and off-process coupling                  */
6379   /* --------------------------------------------------------------------------------*/
6380   PetscScalar   *Aa,*Ba;
6381   MatType       rtype;
6382   Mat_SeqAIJ    *a,*b;
6383   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6384   PetscCall(PetscCalloc1(Bnnz,&Ba));
6385   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6386   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6387   PetscCall(MatDestroy(&mpiaij->A));
6388   PetscCall(MatDestroy(&mpiaij->B));
6389   PetscCall(MatGetRootType_Private(mat,&rtype));
6390   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6391   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6392   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6393 
6394   a = (Mat_SeqAIJ*)mpiaij->A->data;
6395   b = (Mat_SeqAIJ*)mpiaij->B->data;
6396   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6397   a->free_a       = b->free_a       = PETSC_TRUE;
6398   a->free_ij      = b->free_ij      = PETSC_TRUE;
6399 
6400   /* conversion must happen AFTER multiply setup */
6401   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6402   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6403   PetscCall(VecDestroy(&mpiaij->lvec));
6404   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6405   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6406 
6407   mpiaij->coo_n   = coo_n;
6408   mpiaij->coo_sf  = sf2;
6409   mpiaij->sendlen = nleaves;
6410   mpiaij->recvlen = nroots;
6411 
6412   mpiaij->Annz1   = Annz1;
6413   mpiaij->Annz2   = Annz2;
6414   mpiaij->Bnnz1   = Bnnz1;
6415   mpiaij->Bnnz2   = Bnnz2;
6416 
6417   mpiaij->Atot1   = Atot1;
6418   mpiaij->Atot2   = Atot2;
6419   mpiaij->Btot1   = Btot1;
6420   mpiaij->Btot2   = Btot2;
6421 
6422   mpiaij->Aimap1  = Aimap1;
6423   mpiaij->Aimap2  = Aimap2;
6424   mpiaij->Bimap1  = Bimap1;
6425   mpiaij->Bimap2  = Bimap2;
6426 
6427   mpiaij->Ajmap1  = Ajmap1;
6428   mpiaij->Ajmap2  = Ajmap2;
6429   mpiaij->Bjmap1  = Bjmap1;
6430   mpiaij->Bjmap2  = Bjmap2;
6431 
6432   mpiaij->Aperm1  = Aperm1;
6433   mpiaij->Aperm2  = Aperm2;
6434   mpiaij->Bperm1  = Bperm1;
6435   mpiaij->Bperm2  = Bperm2;
6436 
6437   mpiaij->Cperm1  = Cperm1;
6438 
6439   /* Allocate in preallocation. If not used, it has zero cost on host */
6440   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6441   PetscFunctionReturn(0);
6442 }
6443 
6444 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6445 {
6446   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6447   Mat                  A = mpiaij->A,B = mpiaij->B;
6448   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6449   PetscScalar          *Aa,*Ba;
6450   PetscScalar          *sendbuf = mpiaij->sendbuf;
6451   PetscScalar          *recvbuf = mpiaij->recvbuf;
6452   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6453   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6454   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6455   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6456 
6457   PetscFunctionBegin;
6458   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6459   PetscCall(MatSeqAIJGetArray(B,&Ba));
6460   if (imode == INSERT_VALUES) {
6461     PetscCall(PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar)));
6462     PetscCall(PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar)));
6463   }
6464 
6465   /* Pack entries to be sent to remote */
6466   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6467 
6468   /* Send remote entries to their owner and overlap the communication with local computation */
6469   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6470   /* Add local entries to A and B */
6471   for (PetscCount i=0; i<Annz1; i++) {
6472     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6473   }
6474   for (PetscCount i=0; i<Bnnz1; i++) {
6475     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6476   }
6477   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6478 
6479   /* Add received remote entries to A and B */
6480   for (PetscCount i=0; i<Annz2; i++) {
6481     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6482   }
6483   for (PetscCount i=0; i<Bnnz2; i++) {
6484     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6485   }
6486   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6487   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6488   PetscFunctionReturn(0);
6489 }
6490 
6491 /* ----------------------------------------------------------------*/
6492 
6493 /*MC
6494    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6495 
6496    Options Database Keys:
6497 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6498 
6499    Level: beginner
6500 
6501    Notes:
6502     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6503     in this case the values associated with the rows and columns one passes in are set to zero
6504     in the matrix
6505 
6506     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6507     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6508 
6509 .seealso: MatCreateAIJ()
6510 M*/
6511 
6512 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6513 {
6514   Mat_MPIAIJ     *b;
6515   PetscMPIInt    size;
6516 
6517   PetscFunctionBegin;
6518   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6519 
6520   PetscCall(PetscNewLog(B,&b));
6521   B->data       = (void*)b;
6522   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6523   B->assembled  = PETSC_FALSE;
6524   B->insertmode = NOT_SET_VALUES;
6525   b->size       = size;
6526 
6527   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6528 
6529   /* build cache for off array entries formed */
6530   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6531 
6532   b->donotstash  = PETSC_FALSE;
6533   b->colmap      = NULL;
6534   b->garray      = NULL;
6535   b->roworiented = PETSC_TRUE;
6536 
6537   /* stuff used for matrix vector multiply */
6538   b->lvec  = NULL;
6539   b->Mvctx = NULL;
6540 
6541   /* stuff for MatGetRow() */
6542   b->rowindices   = NULL;
6543   b->rowvalues    = NULL;
6544   b->getrowactive = PETSC_FALSE;
6545 
6546   /* flexible pointer used in CUSPARSE classes */
6547   b->spptr = NULL;
6548 
6549   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6550   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6551   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6552   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6553   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6554   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6555   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6556   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6557   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6558   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6559 #if defined(PETSC_HAVE_CUDA)
6560   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6561 #endif
6562 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6563   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6564 #endif
6565 #if defined(PETSC_HAVE_MKL_SPARSE)
6566   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6567 #endif
6568   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6569   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6570   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6571   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6572 #if defined(PETSC_HAVE_ELEMENTAL)
6573   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6574 #endif
6575 #if defined(PETSC_HAVE_SCALAPACK)
6576   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6577 #endif
6578   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6579   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6580 #if defined(PETSC_HAVE_HYPRE)
6581   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6582   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6583 #endif
6584   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6585   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6586   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6587   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6588   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6589   PetscFunctionReturn(0);
6590 }
6591 
6592 /*@C
6593      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6594          and "off-diagonal" part of the matrix in CSR format.
6595 
6596    Collective
6597 
6598    Input Parameters:
6599 +  comm - MPI communicator
6600 .  m - number of local rows (Cannot be PETSC_DECIDE)
6601 .  n - This value should be the same as the local size used in creating the
6602        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6603        calculated if N is given) For square matrices n is almost always m.
6604 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6605 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6606 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6607 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6608 .   a - matrix values
6609 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6610 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6611 -   oa - matrix values
6612 
6613    Output Parameter:
6614 .   mat - the matrix
6615 
6616    Level: advanced
6617 
6618    Notes:
6619        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6620        must free the arrays once the matrix has been destroyed and not before.
6621 
6622        The i and j indices are 0 based
6623 
6624        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6625 
6626        This sets local rows and cannot be used to set off-processor values.
6627 
6628        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6629        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6630        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6631        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6632        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6633        communication if it is known that only local entries will be set.
6634 
6635 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6636           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6637 @*/
6638 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6639 {
6640   Mat_MPIAIJ     *maij;
6641 
6642   PetscFunctionBegin;
6643   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6644   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6645   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6646   PetscCall(MatCreate(comm,mat));
6647   PetscCall(MatSetSizes(*mat,m,n,M,N));
6648   PetscCall(MatSetType(*mat,MATMPIAIJ));
6649   maij = (Mat_MPIAIJ*) (*mat)->data;
6650 
6651   (*mat)->preallocated = PETSC_TRUE;
6652 
6653   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6654   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6655 
6656   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6657   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6658 
6659   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6660   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6661   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6662   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6663   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6664   PetscFunctionReturn(0);
6665 }
6666 
6667 typedef struct {
6668   Mat       *mp;    /* intermediate products */
6669   PetscBool *mptmp; /* is the intermediate product temporary ? */
6670   PetscInt  cp;     /* number of intermediate products */
6671 
6672   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6673   PetscInt    *startsj_s,*startsj_r;
6674   PetscScalar *bufa;
6675   Mat         P_oth;
6676 
6677   /* may take advantage of merging product->B */
6678   Mat Bloc; /* B-local by merging diag and off-diag */
6679 
6680   /* cusparse does not have support to split between symbolic and numeric phases.
6681      When api_user is true, we don't need to update the numerical values
6682      of the temporary storage */
6683   PetscBool reusesym;
6684 
6685   /* support for COO values insertion */
6686   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6687   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6688   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6689   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6690   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6691   PetscMemType mtype;
6692 
6693   /* customization */
6694   PetscBool abmerge;
6695   PetscBool P_oth_bind;
6696 } MatMatMPIAIJBACKEND;
6697 
6698 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6699 {
6700   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6701   PetscInt            i;
6702 
6703   PetscFunctionBegin;
6704   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6705   PetscCall(PetscFree(mmdata->bufa));
6706   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6707   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6708   PetscCall(MatDestroy(&mmdata->P_oth));
6709   PetscCall(MatDestroy(&mmdata->Bloc));
6710   PetscCall(PetscSFDestroy(&mmdata->sf));
6711   for (i = 0; i < mmdata->cp; i++) {
6712     PetscCall(MatDestroy(&mmdata->mp[i]));
6713   }
6714   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6715   PetscCall(PetscFree(mmdata->own[0]));
6716   PetscCall(PetscFree(mmdata->own));
6717   PetscCall(PetscFree(mmdata->off[0]));
6718   PetscCall(PetscFree(mmdata->off));
6719   PetscCall(PetscFree(mmdata));
6720   PetscFunctionReturn(0);
6721 }
6722 
6723 /* Copy selected n entries with indices in idx[] of A to v[].
6724    If idx is NULL, copy the whole data array of A to v[]
6725  */
6726 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6727 {
6728   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6729 
6730   PetscFunctionBegin;
6731   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6732   if (f) {
6733     PetscCall((*f)(A,n,idx,v));
6734   } else {
6735     const PetscScalar *vv;
6736 
6737     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6738     if (n && idx) {
6739       PetscScalar    *w = v;
6740       const PetscInt *oi = idx;
6741       PetscInt       j;
6742 
6743       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6744     } else {
6745       PetscCall(PetscArraycpy(v,vv,n));
6746     }
6747     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6748   }
6749   PetscFunctionReturn(0);
6750 }
6751 
6752 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6753 {
6754   MatMatMPIAIJBACKEND *mmdata;
6755   PetscInt            i,n_d,n_o;
6756 
6757   PetscFunctionBegin;
6758   MatCheckProduct(C,1);
6759   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6760   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6761   if (!mmdata->reusesym) { /* update temporary matrices */
6762     if (mmdata->P_oth) {
6763       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6764     }
6765     if (mmdata->Bloc) {
6766       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6767     }
6768   }
6769   mmdata->reusesym = PETSC_FALSE;
6770 
6771   for (i = 0; i < mmdata->cp; i++) {
6772     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6773     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6774   }
6775   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6776     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6777 
6778     if (mmdata->mptmp[i]) continue;
6779     if (noff) {
6780       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6781 
6782       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6783       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6784       n_o += noff;
6785       n_d += nown;
6786     } else {
6787       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6788 
6789       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6790       n_d += mm->nz;
6791     }
6792   }
6793   if (mmdata->hasoffproc) { /* offprocess insertion */
6794     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6795     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6796   }
6797   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6798   PetscFunctionReturn(0);
6799 }
6800 
6801 /* Support for Pt * A, A * P, or Pt * A * P */
6802 #define MAX_NUMBER_INTERMEDIATE 4
6803 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6804 {
6805   Mat_Product            *product = C->product;
6806   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6807   Mat_MPIAIJ             *a,*p;
6808   MatMatMPIAIJBACKEND    *mmdata;
6809   ISLocalToGlobalMapping P_oth_l2g = NULL;
6810   IS                     glob = NULL;
6811   const char             *prefix;
6812   char                   pprefix[256];
6813   const PetscInt         *globidx,*P_oth_idx;
6814   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6815   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6816   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6817                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6818                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6819   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6820 
6821   MatProductType         ptype;
6822   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6823   PetscMPIInt            size;
6824 
6825   PetscFunctionBegin;
6826   MatCheckProduct(C,1);
6827   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6828   ptype = product->type;
6829   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6830     ptype = MATPRODUCT_AB;
6831     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6832   }
6833   switch (ptype) {
6834   case MATPRODUCT_AB:
6835     A = product->A;
6836     P = product->B;
6837     m = A->rmap->n;
6838     n = P->cmap->n;
6839     M = A->rmap->N;
6840     N = P->cmap->N;
6841     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6842     break;
6843   case MATPRODUCT_AtB:
6844     P = product->A;
6845     A = product->B;
6846     m = P->cmap->n;
6847     n = A->cmap->n;
6848     M = P->cmap->N;
6849     N = A->cmap->N;
6850     hasoffproc = PETSC_TRUE;
6851     break;
6852   case MATPRODUCT_PtAP:
6853     A = product->A;
6854     P = product->B;
6855     m = P->cmap->n;
6856     n = P->cmap->n;
6857     M = P->cmap->N;
6858     N = P->cmap->N;
6859     hasoffproc = PETSC_TRUE;
6860     break;
6861   default:
6862     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6863   }
6864   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
6865   if (size == 1) hasoffproc = PETSC_FALSE;
6866 
6867   /* defaults */
6868   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6869     mp[i]    = NULL;
6870     mptmp[i] = PETSC_FALSE;
6871     rmapt[i] = -1;
6872     cmapt[i] = -1;
6873     rmapa[i] = NULL;
6874     cmapa[i] = NULL;
6875   }
6876 
6877   /* customization */
6878   PetscCall(PetscNew(&mmdata));
6879   mmdata->reusesym = product->api_user;
6880   if (ptype == MATPRODUCT_AB) {
6881     if (product->api_user) {
6882       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
6883       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6884       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6885       PetscOptionsEnd();
6886     } else {
6887       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
6888       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
6889       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6890       PetscOptionsEnd();
6891     }
6892   } else if (ptype == MATPRODUCT_PtAP) {
6893     if (product->api_user) {
6894       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
6895       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6896       PetscOptionsEnd();
6897     } else {
6898       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
6899       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
6900       PetscOptionsEnd();
6901     }
6902   }
6903   a = (Mat_MPIAIJ*)A->data;
6904   p = (Mat_MPIAIJ*)P->data;
6905   PetscCall(MatSetSizes(C,m,n,M,N));
6906   PetscCall(PetscLayoutSetUp(C->rmap));
6907   PetscCall(PetscLayoutSetUp(C->cmap));
6908   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6909   PetscCall(MatGetOptionsPrefix(C,&prefix));
6910 
6911   cp   = 0;
6912   switch (ptype) {
6913   case MATPRODUCT_AB: /* A * P */
6914     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6915 
6916     /* A_diag * P_local (merged or not) */
6917     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6918       /* P is product->B */
6919       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
6920       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
6921       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6922       PetscCall(MatProductSetFill(mp[cp],product->fill));
6923       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6924       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6925       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6926       mp[cp]->product->api_user = product->api_user;
6927       PetscCall(MatProductSetFromOptions(mp[cp]));
6928       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6929       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6930       PetscCall(ISGetIndices(glob,&globidx));
6931       rmapt[cp] = 1;
6932       cmapt[cp] = 2;
6933       cmapa[cp] = globidx;
6934       mptmp[cp] = PETSC_FALSE;
6935       cp++;
6936     } else { /* A_diag * P_diag and A_diag * P_off */
6937       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
6938       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6939       PetscCall(MatProductSetFill(mp[cp],product->fill));
6940       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6941       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6942       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6943       mp[cp]->product->api_user = product->api_user;
6944       PetscCall(MatProductSetFromOptions(mp[cp]));
6945       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6946       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6947       rmapt[cp] = 1;
6948       cmapt[cp] = 1;
6949       mptmp[cp] = PETSC_FALSE;
6950       cp++;
6951       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
6952       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6953       PetscCall(MatProductSetFill(mp[cp],product->fill));
6954       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6955       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6956       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6957       mp[cp]->product->api_user = product->api_user;
6958       PetscCall(MatProductSetFromOptions(mp[cp]));
6959       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6960       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6961       rmapt[cp] = 1;
6962       cmapt[cp] = 2;
6963       cmapa[cp] = p->garray;
6964       mptmp[cp] = PETSC_FALSE;
6965       cp++;
6966     }
6967 
6968     /* A_off * P_other */
6969     if (mmdata->P_oth) {
6970       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
6971       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
6972       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
6973       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
6974       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
6975       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
6976       PetscCall(MatProductSetFill(mp[cp],product->fill));
6977       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
6978       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
6979       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
6980       mp[cp]->product->api_user = product->api_user;
6981       PetscCall(MatProductSetFromOptions(mp[cp]));
6982       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6983       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
6984       rmapt[cp] = 1;
6985       cmapt[cp] = 2;
6986       cmapa[cp] = P_oth_idx;
6987       mptmp[cp] = PETSC_FALSE;
6988       cp++;
6989     }
6990     break;
6991 
6992   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6993     /* A is product->B */
6994     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
6995     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6996       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
6997       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
6998       PetscCall(MatProductSetFill(mp[cp],product->fill));
6999       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7000       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7001       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7002       mp[cp]->product->api_user = product->api_user;
7003       PetscCall(MatProductSetFromOptions(mp[cp]));
7004       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7005       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7006       PetscCall(ISGetIndices(glob,&globidx));
7007       rmapt[cp] = 2;
7008       rmapa[cp] = globidx;
7009       cmapt[cp] = 2;
7010       cmapa[cp] = globidx;
7011       mptmp[cp] = PETSC_FALSE;
7012       cp++;
7013     } else {
7014       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7015       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7016       PetscCall(MatProductSetFill(mp[cp],product->fill));
7017       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7018       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7019       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7020       mp[cp]->product->api_user = product->api_user;
7021       PetscCall(MatProductSetFromOptions(mp[cp]));
7022       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7023       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7024       PetscCall(ISGetIndices(glob,&globidx));
7025       rmapt[cp] = 1;
7026       cmapt[cp] = 2;
7027       cmapa[cp] = globidx;
7028       mptmp[cp] = PETSC_FALSE;
7029       cp++;
7030       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7031       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7032       PetscCall(MatProductSetFill(mp[cp],product->fill));
7033       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7034       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7035       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7036       mp[cp]->product->api_user = product->api_user;
7037       PetscCall(MatProductSetFromOptions(mp[cp]));
7038       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7039       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7040       rmapt[cp] = 2;
7041       rmapa[cp] = p->garray;
7042       cmapt[cp] = 2;
7043       cmapa[cp] = globidx;
7044       mptmp[cp] = PETSC_FALSE;
7045       cp++;
7046     }
7047     break;
7048   case MATPRODUCT_PtAP:
7049     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7050     /* P is product->B */
7051     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7052     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7053     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7054     PetscCall(MatProductSetFill(mp[cp],product->fill));
7055     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7056     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7057     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7058     mp[cp]->product->api_user = product->api_user;
7059     PetscCall(MatProductSetFromOptions(mp[cp]));
7060     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7061     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7062     PetscCall(ISGetIndices(glob,&globidx));
7063     rmapt[cp] = 2;
7064     rmapa[cp] = globidx;
7065     cmapt[cp] = 2;
7066     cmapa[cp] = globidx;
7067     mptmp[cp] = PETSC_FALSE;
7068     cp++;
7069     if (mmdata->P_oth) {
7070       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7071       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7072       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7073       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7074       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7075       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7076       PetscCall(MatProductSetFill(mp[cp],product->fill));
7077       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7078       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7079       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7080       mp[cp]->product->api_user = product->api_user;
7081       PetscCall(MatProductSetFromOptions(mp[cp]));
7082       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7083       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7084       mptmp[cp] = PETSC_TRUE;
7085       cp++;
7086       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7087       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7088       PetscCall(MatProductSetFill(mp[cp],product->fill));
7089       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7090       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7091       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7092       mp[cp]->product->api_user = product->api_user;
7093       PetscCall(MatProductSetFromOptions(mp[cp]));
7094       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7095       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7096       rmapt[cp] = 2;
7097       rmapa[cp] = globidx;
7098       cmapt[cp] = 2;
7099       cmapa[cp] = P_oth_idx;
7100       mptmp[cp] = PETSC_FALSE;
7101       cp++;
7102     }
7103     break;
7104   default:
7105     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7106   }
7107   /* sanity check */
7108   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7109 
7110   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7111   for (i = 0; i < cp; i++) {
7112     mmdata->mp[i]    = mp[i];
7113     mmdata->mptmp[i] = mptmp[i];
7114   }
7115   mmdata->cp = cp;
7116   C->product->data       = mmdata;
7117   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7118   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7119 
7120   /* memory type */
7121   mmdata->mtype = PETSC_MEMTYPE_HOST;
7122   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7123   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7124   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7125   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7126 
7127   /* prepare coo coordinates for values insertion */
7128 
7129   /* count total nonzeros of those intermediate seqaij Mats
7130     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7131     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7132     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7133   */
7134   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7135     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7136     if (mptmp[cp]) continue;
7137     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7138       const PetscInt *rmap = rmapa[cp];
7139       const PetscInt mr = mp[cp]->rmap->n;
7140       const PetscInt rs = C->rmap->rstart;
7141       const PetscInt re = C->rmap->rend;
7142       const PetscInt *ii  = mm->i;
7143       for (i = 0; i < mr; i++) {
7144         const PetscInt gr = rmap[i];
7145         const PetscInt nz = ii[i+1] - ii[i];
7146         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7147         else ncoo_oown += nz; /* this row is local */
7148       }
7149     } else ncoo_d += mm->nz;
7150   }
7151 
7152   /*
7153     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7154 
7155     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7156 
7157     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7158 
7159     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7160     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7161     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7162 
7163     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7164     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7165   */
7166   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7167   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7168 
7169   /* gather (i,j) of nonzeros inserted by remote procs */
7170   if (hasoffproc) {
7171     PetscSF  msf;
7172     PetscInt ncoo2,*coo_i2,*coo_j2;
7173 
7174     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7175     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7176     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7177 
7178     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7179       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7180       PetscInt   *idxoff = mmdata->off[cp];
7181       PetscInt   *idxown = mmdata->own[cp];
7182       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7183         const PetscInt *rmap = rmapa[cp];
7184         const PetscInt *cmap = cmapa[cp];
7185         const PetscInt *ii  = mm->i;
7186         PetscInt       *coi = coo_i + ncoo_o;
7187         PetscInt       *coj = coo_j + ncoo_o;
7188         const PetscInt mr = mp[cp]->rmap->n;
7189         const PetscInt rs = C->rmap->rstart;
7190         const PetscInt re = C->rmap->rend;
7191         const PetscInt cs = C->cmap->rstart;
7192         for (i = 0; i < mr; i++) {
7193           const PetscInt *jj = mm->j + ii[i];
7194           const PetscInt gr  = rmap[i];
7195           const PetscInt nz  = ii[i+1] - ii[i];
7196           if (gr < rs || gr >= re) { /* this is an offproc row */
7197             for (j = ii[i]; j < ii[i+1]; j++) {
7198               *coi++ = gr;
7199               *idxoff++ = j;
7200             }
7201             if (!cmapt[cp]) { /* already global */
7202               for (j = 0; j < nz; j++) *coj++ = jj[j];
7203             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7204               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7205             } else { /* offdiag */
7206               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7207             }
7208             ncoo_o += nz;
7209           } else { /* this is a local row */
7210             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7211           }
7212         }
7213       }
7214       mmdata->off[cp + 1] = idxoff;
7215       mmdata->own[cp + 1] = idxown;
7216     }
7217 
7218     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7219     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7220     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7221     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7222     ncoo = ncoo_d + ncoo_oown + ncoo2;
7223     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7224     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7225     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7226     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7227     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7228     PetscCall(PetscFree2(coo_i,coo_j));
7229     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7230     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7231     coo_i = coo_i2;
7232     coo_j = coo_j2;
7233   } else { /* no offproc values insertion */
7234     ncoo = ncoo_d;
7235     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7236 
7237     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7238     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7239     PetscCall(PetscSFSetUp(mmdata->sf));
7240   }
7241   mmdata->hasoffproc = hasoffproc;
7242 
7243   /* gather (i,j) of nonzeros inserted locally */
7244   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7245     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7246     PetscInt       *coi = coo_i + ncoo_d;
7247     PetscInt       *coj = coo_j + ncoo_d;
7248     const PetscInt *jj  = mm->j;
7249     const PetscInt *ii  = mm->i;
7250     const PetscInt *cmap = cmapa[cp];
7251     const PetscInt *rmap = rmapa[cp];
7252     const PetscInt mr = mp[cp]->rmap->n;
7253     const PetscInt rs = C->rmap->rstart;
7254     const PetscInt re = C->rmap->rend;
7255     const PetscInt cs = C->cmap->rstart;
7256 
7257     if (mptmp[cp]) continue;
7258     if (rmapt[cp] == 1) { /* consecutive rows */
7259       /* fill coo_i */
7260       for (i = 0; i < mr; i++) {
7261         const PetscInt gr = i + rs;
7262         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7263       }
7264       /* fill coo_j */
7265       if (!cmapt[cp]) { /* type-0, already global */
7266         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7267       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7268         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7269       } else { /* type-2, local to global for sparse columns */
7270         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7271       }
7272       ncoo_d += mm->nz;
7273     } else if (rmapt[cp] == 2) { /* sparse rows */
7274       for (i = 0; i < mr; i++) {
7275         const PetscInt *jj = mm->j + ii[i];
7276         const PetscInt gr  = rmap[i];
7277         const PetscInt nz  = ii[i+1] - ii[i];
7278         if (gr >= rs && gr < re) { /* local rows */
7279           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7280           if (!cmapt[cp]) { /* type-0, already global */
7281             for (j = 0; j < nz; j++) *coj++ = jj[j];
7282           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7283             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7284           } else { /* type-2, local to global for sparse columns */
7285             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7286           }
7287           ncoo_d += nz;
7288         }
7289       }
7290     }
7291   }
7292   if (glob) {
7293     PetscCall(ISRestoreIndices(glob,&globidx));
7294   }
7295   PetscCall(ISDestroy(&glob));
7296   if (P_oth_l2g) {
7297     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7298   }
7299   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7300   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7301   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7302 
7303   /* preallocate with COO data */
7304   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7305   PetscCall(PetscFree2(coo_i,coo_j));
7306   PetscFunctionReturn(0);
7307 }
7308 
7309 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7310 {
7311   Mat_Product *product = mat->product;
7312 #if defined(PETSC_HAVE_DEVICE)
7313   PetscBool    match   = PETSC_FALSE;
7314   PetscBool    usecpu  = PETSC_FALSE;
7315 #else
7316   PetscBool    match   = PETSC_TRUE;
7317 #endif
7318 
7319   PetscFunctionBegin;
7320   MatCheckProduct(mat,1);
7321 #if defined(PETSC_HAVE_DEVICE)
7322   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7323     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7324   }
7325   if (match) { /* we can always fallback to the CPU if requested */
7326     switch (product->type) {
7327     case MATPRODUCT_AB:
7328       if (product->api_user) {
7329         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7330         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7331         PetscOptionsEnd();
7332       } else {
7333         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7334         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7335         PetscOptionsEnd();
7336       }
7337       break;
7338     case MATPRODUCT_AtB:
7339       if (product->api_user) {
7340         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7341         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7342         PetscOptionsEnd();
7343       } else {
7344         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7345         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7346         PetscOptionsEnd();
7347       }
7348       break;
7349     case MATPRODUCT_PtAP:
7350       if (product->api_user) {
7351         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7352         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7353         PetscOptionsEnd();
7354       } else {
7355         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7356         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7357         PetscOptionsEnd();
7358       }
7359       break;
7360     default:
7361       break;
7362     }
7363     match = (PetscBool)!usecpu;
7364   }
7365 #endif
7366   if (match) {
7367     switch (product->type) {
7368     case MATPRODUCT_AB:
7369     case MATPRODUCT_AtB:
7370     case MATPRODUCT_PtAP:
7371       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7372       break;
7373     default:
7374       break;
7375     }
7376   }
7377   /* fallback to MPIAIJ ops */
7378   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7379   PetscFunctionReturn(0);
7380 }
7381 
7382 /*
7383     Special version for direct calls from Fortran
7384 */
7385 #include <petsc/private/fortranimpl.h>
7386 
7387 /* Change these macros so can be used in void function */
7388 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7389 #undef  PetscCall
7390 #define PetscCall(...) do {                                                                    \
7391     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7392     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7393       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7394       return;                                                                                  \
7395     }                                                                                          \
7396   } while (0)
7397 
7398 #undef SETERRQ
7399 #define SETERRQ(comm,ierr,...) do {                                                            \
7400     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7401     return;                                                                                    \
7402   } while (0)
7403 
7404 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7405 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7406 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7407 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7408 #else
7409 #endif
7410 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7411 {
7412   Mat          mat  = *mmat;
7413   PetscInt     m    = *mm, n = *mn;
7414   InsertMode   addv = *maddv;
7415   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7416   PetscScalar  value;
7417 
7418   MatCheckPreallocated(mat,1);
7419   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7420   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7421   {
7422     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7423     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7424     PetscBool roworiented = aij->roworiented;
7425 
7426     /* Some Variables required in the macro */
7427     Mat        A                    = aij->A;
7428     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7429     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7430     MatScalar  *aa;
7431     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7432     Mat        B                    = aij->B;
7433     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7434     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7435     MatScalar  *ba;
7436     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7437      * cannot use "#if defined" inside a macro. */
7438     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7439 
7440     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7441     PetscInt  nonew = a->nonew;
7442     MatScalar *ap1,*ap2;
7443 
7444     PetscFunctionBegin;
7445     PetscCall(MatSeqAIJGetArray(A,&aa));
7446     PetscCall(MatSeqAIJGetArray(B,&ba));
7447     for (i=0; i<m; i++) {
7448       if (im[i] < 0) continue;
7449       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7450       if (im[i] >= rstart && im[i] < rend) {
7451         row      = im[i] - rstart;
7452         lastcol1 = -1;
7453         rp1      = aj + ai[row];
7454         ap1      = aa + ai[row];
7455         rmax1    = aimax[row];
7456         nrow1    = ailen[row];
7457         low1     = 0;
7458         high1    = nrow1;
7459         lastcol2 = -1;
7460         rp2      = bj + bi[row];
7461         ap2      = ba + bi[row];
7462         rmax2    = bimax[row];
7463         nrow2    = bilen[row];
7464         low2     = 0;
7465         high2    = nrow2;
7466 
7467         for (j=0; j<n; j++) {
7468           if (roworiented) value = v[i*n+j];
7469           else value = v[i+j*m];
7470           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7471           if (in[j] >= cstart && in[j] < cend) {
7472             col = in[j] - cstart;
7473             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7474           } else if (in[j] < 0) continue;
7475           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7476             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7477             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7478           } else {
7479             if (mat->was_assembled) {
7480               if (!aij->colmap) {
7481                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
7482               }
7483 #if defined(PETSC_USE_CTABLE)
7484               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
7485               col--;
7486 #else
7487               col = aij->colmap[in[j]] - 1;
7488 #endif
7489               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7490                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7491                 col  =  in[j];
7492                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7493                 B        = aij->B;
7494                 b        = (Mat_SeqAIJ*)B->data;
7495                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7496                 rp2      = bj + bi[row];
7497                 ap2      = ba + bi[row];
7498                 rmax2    = bimax[row];
7499                 nrow2    = bilen[row];
7500                 low2     = 0;
7501                 high2    = nrow2;
7502                 bm       = aij->B->rmap->n;
7503                 ba       = b->a;
7504                 inserted = PETSC_FALSE;
7505               }
7506             } else col = in[j];
7507             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7508           }
7509         }
7510       } else if (!aij->donotstash) {
7511         if (roworiented) {
7512           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7513         } else {
7514           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7515         }
7516       }
7517     }
7518     PetscCall(MatSeqAIJRestoreArray(A,&aa));
7519     PetscCall(MatSeqAIJRestoreArray(B,&ba));
7520   }
7521   PetscFunctionReturnVoid();
7522 }
7523 /* Undefining these here since they were redefined from their original definition above! No
7524  * other PETSc functions should be defined past this point, as it is impossible to recover the
7525  * original definitions */
7526 #undef PetscCall
7527 #undef SETERRQ
7528