xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2e16c0ce58b3a4ec287cbc0a0807bfb0a0fa5ac9)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
10 {
11   Mat            B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B));
15   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B));
16   PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
17   PetscCall(MatDestroy(&B));
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
22 {
23   Mat            B;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B));
27   PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done));
28   PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",NULL));
29   PetscFunctionReturn(0);
30 }
31 
32 /*MC
33    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
36    and MATMPIAIJ otherwise.  As a result, for single process communicators,
37   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
43 
44   Developer Notes:
45     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
46    enough exist.
47 
48   Level: beginner
49 
50 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
51 M*/
52 
53 /*MC
54    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
55 
56    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
57    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
58    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
59   for communicators controlling multiple processes.  It is recommended that you call both of
60   the above preallocation routines for simplicity.
61 
62    Options Database Keys:
63 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
64 
65   Level: beginner
66 
67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
68 M*/
69 
70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
71 {
72   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
73 
74   PetscFunctionBegin;
75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
76   A->boundtocpu = flg;
77 #endif
78   if (a->A) PetscCall(MatBindToCPU(a->A,flg));
79   if (a->B) PetscCall(MatBindToCPU(a->B,flg));
80 
81   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
82    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
83    * to differ from the parent matrix. */
84   if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg));
85   if (a->diag) PetscCall(VecBindToCPU(a->diag,flg));
86 
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
91 {
92   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
93 
94   PetscFunctionBegin;
95   if (mat->A) {
96     PetscCall(MatSetBlockSizes(mat->A,rbs,cbs));
97     PetscCall(MatSetBlockSizes(mat->B,rbs,1));
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
103 {
104   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
105   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
106   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
107   const PetscInt  *ia,*ib;
108   const MatScalar *aa,*bb,*aav,*bav;
109   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
110   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
111 
112   PetscFunctionBegin;
113   *keptrows = NULL;
114 
115   ia   = a->i;
116   ib   = b->i;
117   PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav));
118   PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav));
119   for (i=0; i<m; i++) {
120     na = ia[i+1] - ia[i];
121     nb = ib[i+1] - ib[i];
122     if (!na && !nb) {
123       cnt++;
124       goto ok1;
125     }
126     aa = aav + ia[i];
127     for (j=0; j<na; j++) {
128       if (aa[j] != 0.0) goto ok1;
129     }
130     bb = bav + ib[i];
131     for (j=0; j <nb; j++) {
132       if (bb[j] != 0.0) goto ok1;
133     }
134     cnt++;
135 ok1:;
136   }
137   PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M)));
138   if (!n0rows) {
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
140     PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
141     PetscFunctionReturn(0);
142   }
143   PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows));
144   cnt  = 0;
145   for (i=0; i<m; i++) {
146     na = ia[i+1] - ia[i];
147     nb = ib[i+1] - ib[i];
148     if (!na && !nb) continue;
149     aa = aav + ia[i];
150     for (j=0; j<na;j++) {
151       if (aa[j] != 0.0) {
152         rows[cnt++] = rstart + i;
153         goto ok2;
154       }
155     }
156     bb = bav + ib[i];
157     for (j=0; j<nb; j++) {
158       if (bb[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163 ok2:;
164   }
165   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav));
167   PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav));
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
172 {
173   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
174   PetscBool         cong;
175 
176   PetscFunctionBegin;
177   PetscCall(MatHasCongruentLayouts(Y,&cong));
178   if (Y->assembled && cong) {
179     PetscCall(MatDiagonalSet(aij->A,D,is));
180   } else {
181     PetscCall(MatDiagonalSet_Default(Y,D,is));
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
189   PetscInt       i,rstart,nrows,*rows;
190 
191   PetscFunctionBegin;
192   *zrows = NULL;
193   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows));
194   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
195   for (i=0; i<nrows; i++) rows[i] += rstart;
196   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows));
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
201 {
202   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
203   PetscInt          i,m,n,*garray = aij->garray;
204   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
205   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
206   PetscReal         *work;
207   const PetscScalar *dummy;
208 
209   PetscFunctionBegin;
210   PetscCall(MatGetSize(A,&m,&n));
211   PetscCall(PetscCalloc1(n,&work));
212   PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy));
213   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy));
214   PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy));
215   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy));
216   if (type == NORM_2) {
217     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
218       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
219     }
220     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
221       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
222     }
223   } else if (type == NORM_1) {
224     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
225       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
226     }
227     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
228       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
229     }
230   } else if (type == NORM_INFINITY) {
231     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
232       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
233     }
234     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
235       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
236     }
237   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
238     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
239       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
240     }
241     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
242       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
243     }
244   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
245     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
246       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
247     }
248     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
249       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
250     }
251   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
252   if (type == NORM_INFINITY) {
253     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
254   } else {
255     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
256   }
257   PetscCall(PetscFree(work));
258   if (type == NORM_2) {
259     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
260   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
261     for (i=0; i<n; i++) reductions[i] /= m;
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
267 {
268   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
269   IS              sis,gis;
270   const PetscInt  *isis,*igis;
271   PetscInt        n,*iis,nsis,ngis,rstart,i;
272 
273   PetscFunctionBegin;
274   PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis));
275   PetscCall(MatFindNonzeroRows(a->B,&gis));
276   PetscCall(ISGetSize(gis,&ngis));
277   PetscCall(ISGetSize(sis,&nsis));
278   PetscCall(ISGetIndices(sis,&isis));
279   PetscCall(ISGetIndices(gis,&igis));
280 
281   PetscCall(PetscMalloc1(ngis+nsis,&iis));
282   PetscCall(PetscArraycpy(iis,igis,ngis));
283   PetscCall(PetscArraycpy(iis+ngis,isis,nsis));
284   n    = ngis + nsis;
285   PetscCall(PetscSortRemoveDupsInt(&n,iis));
286   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
287   for (i=0; i<n; i++) iis[i] += rstart;
288   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is));
289 
290   PetscCall(ISRestoreIndices(sis,&isis));
291   PetscCall(ISRestoreIndices(gis,&igis));
292   PetscCall(ISDestroy(&sis));
293   PetscCall(ISDestroy(&gis));
294   PetscFunctionReturn(0);
295 }
296 
297 /*
298   Local utility routine that creates a mapping from the global column
299 number to the local number in the off-diagonal part of the local
300 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
301 a slightly higher hash table cost; without it it is not scalable (each processor
302 has an order N integer array but is fast to access.
303 */
304 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscInt       n = aij->B->cmap->n,i;
308 
309   PetscFunctionBegin;
310   PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
311 #if defined(PETSC_USE_CTABLE)
312   PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap));
313   for (i=0; i<n; i++) {
314     PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES));
315   }
316 #else
317   PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap));
318   PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt)));
319   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
320 #endif
321   PetscFunctionReturn(0);
322 }
323 
324 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
325 { \
326     if (col <= lastcol1)  low1 = 0;     \
327     else                 high1 = nrow1; \
328     lastcol1 = col;\
329     while (high1-low1 > 5) { \
330       t = (low1+high1)/2; \
331       if (rp1[t] > col) high1 = t; \
332       else              low1  = t; \
333     } \
334       for (_i=low1; _i<high1; _i++) { \
335         if (rp1[_i] > col) break; \
336         if (rp1[_i] == col) { \
337           if (addv == ADD_VALUES) { \
338             ap1[_i] += value;   \
339             /* Not sure LogFlops will slow dow the code or not */ \
340             (void)PetscLogFlops(1.0);   \
341            } \
342           else                    ap1[_i] = value; \
343           goto a_noinsert; \
344         } \
345       }  \
346       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
347       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
348       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
349       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
350       N = nrow1++ - 1; a->nz++; high1++; \
351       /* shift up all the later entries in this row */ \
352       PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\
353       PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\
354       rp1[_i] = col;  \
355       ap1[_i] = value;  \
356       A->nonzerostate++;\
357       a_noinsert: ; \
358       ailen[row] = nrow1; \
359 }
360 
361 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
362   { \
363     if (col <= lastcol2) low2 = 0;                        \
364     else high2 = nrow2;                                   \
365     lastcol2 = col;                                       \
366     while (high2-low2 > 5) {                              \
367       t = (low2+high2)/2;                                 \
368       if (rp2[t] > col) high2 = t;                        \
369       else             low2  = t;                         \
370     }                                                     \
371     for (_i=low2; _i<high2; _i++) {                       \
372       if (rp2[_i] > col) break;                           \
373       if (rp2[_i] == col) {                               \
374         if (addv == ADD_VALUES) {                         \
375           ap2[_i] += value;                               \
376           (void)PetscLogFlops(1.0);                       \
377         }                                                 \
378         else                    ap2[_i] = value;          \
379         goto b_noinsert;                                  \
380       }                                                   \
381     }                                                     \
382     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
383     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
384     PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
385     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
386     N = nrow2++ - 1; b->nz++; high2++;                    \
387     /* shift up all the later entries in this row */      \
388     PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\
389     PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\
390     rp2[_i] = col;                                        \
391     ap2[_i] = value;                                      \
392     B->nonzerostate++;                                    \
393     b_noinsert: ;                                         \
394     bilen[row] = nrow2;                                   \
395   }
396 
397 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
398 {
399   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
400   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
401   PetscInt       l,*garray = mat->garray,diag;
402   PetscScalar    *aa,*ba;
403 
404   PetscFunctionBegin;
405   /* code only works for square matrices A */
406 
407   /* find size of row to the left of the diagonal part */
408   PetscCall(MatGetOwnershipRange(A,&diag,NULL));
409   row  = row - diag;
410   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
411     if (garray[b->j[b->i[row]+l]] > diag) break;
412   }
413   if (l) {
414     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
415     PetscCall(PetscArraycpy(ba+b->i[row],v,l));
416     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
417   }
418 
419   /* diagonal part */
420   if (a->i[row+1]-a->i[row]) {
421     PetscCall(MatSeqAIJGetArray(mat->A,&aa));
422     PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row])));
423     PetscCall(MatSeqAIJRestoreArray(mat->A,&aa));
424   }
425 
426   /* right of diagonal part */
427   if (b->i[row+1]-b->i[row]-l) {
428     PetscCall(MatSeqAIJGetArray(mat->B,&ba));
429     PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l));
430     PetscCall(MatSeqAIJRestoreArray(mat->B,&ba));
431   }
432   PetscFunctionReturn(0);
433 }
434 
435 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
436 {
437   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
438   PetscScalar    value = 0.0;
439   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
440   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
441   PetscBool      roworiented = aij->roworiented;
442 
443   /* Some Variables required in the macro */
444   Mat        A                    = aij->A;
445   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
446   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
447   PetscBool  ignorezeroentries    = a->ignorezeroentries;
448   Mat        B                    = aij->B;
449   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
450   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
451   MatScalar  *aa,*ba;
452   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
453   PetscInt   nonew;
454   MatScalar  *ap1,*ap2;
455 
456   PetscFunctionBegin;
457   PetscCall(MatSeqAIJGetArray(A,&aa));
458   PetscCall(MatSeqAIJGetArray(B,&ba));
459   for (i=0; i<m; i++) {
460     if (im[i] < 0) continue;
461     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
462     if (im[i] >= rstart && im[i] < rend) {
463       row      = im[i] - rstart;
464       lastcol1 = -1;
465       rp1      = aj + ai[row];
466       ap1      = aa + ai[row];
467       rmax1    = aimax[row];
468       nrow1    = ailen[row];
469       low1     = 0;
470       high1    = nrow1;
471       lastcol2 = -1;
472       rp2      = bj + bi[row];
473       ap2      = ba + bi[row];
474       rmax2    = bimax[row];
475       nrow2    = bilen[row];
476       low2     = 0;
477       high2    = nrow2;
478 
479       for (j=0; j<n; j++) {
480         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
481         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
482         if (in[j] >= cstart && in[j] < cend) {
483           col   = in[j] - cstart;
484           nonew = a->nonew;
485           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
486         } else if (in[j] < 0) {
487           continue;
488         } else {
489           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
490           if (mat->was_assembled) {
491             if (!aij->colmap) {
492               PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
493             }
494 #if defined(PETSC_USE_CTABLE)
495             PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */
496             col--;
497 #else
498             col = aij->colmap[in[j]] - 1;
499 #endif
500             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
501               PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */
502               col  =  in[j];
503               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
504               B        = aij->B;
505               b        = (Mat_SeqAIJ*)B->data;
506               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
507               rp2      = bj + bi[row];
508               ap2      = ba + bi[row];
509               rmax2    = bimax[row];
510               nrow2    = bilen[row];
511               low2     = 0;
512               high2    = nrow2;
513               bm       = aij->B->rmap->n;
514               ba       = b->a;
515             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
516               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
517                 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]));
518               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
519             }
520           } else col = in[j];
521           nonew = b->nonew;
522           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
523         }
524       }
525     } else {
526       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
527       if (!aij->donotstash) {
528         mat->assembled = PETSC_FALSE;
529         if (roworiented) {
530           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
531         } else {
532           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
533         }
534       }
535     }
536   }
537   PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
538   PetscCall(MatSeqAIJRestoreArray(B,&ba));
539   PetscFunctionReturn(0);
540 }
541 
542 /*
543     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
544     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
545     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
546 */
547 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
548 {
549   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
550   Mat            A           = aij->A; /* diagonal part of the matrix */
551   Mat            B           = aij->B; /* offdiagonal part of the matrix */
552   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
553   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
554   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
555   PetscInt       *ailen      = a->ilen,*aj = a->j;
556   PetscInt       *bilen      = b->ilen,*bj = b->j;
557   PetscInt       am          = aij->A->rmap->n,j;
558   PetscInt       diag_so_far = 0,dnz;
559   PetscInt       offd_so_far = 0,onz;
560 
561   PetscFunctionBegin;
562   /* Iterate over all rows of the matrix */
563   for (j=0; j<am; j++) {
564     dnz = onz = 0;
565     /*  Iterate over all non-zero columns of the current row */
566     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
567       /* If column is in the diagonal */
568       if (mat_j[col] >= cstart && mat_j[col] < cend) {
569         aj[diag_so_far++] = mat_j[col] - cstart;
570         dnz++;
571       } else { /* off-diagonal entries */
572         bj[offd_so_far++] = mat_j[col];
573         onz++;
574       }
575     }
576     ailen[j] = dnz;
577     bilen[j] = onz;
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 /*
583     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
584     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
585     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
586     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
587     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
588 */
589 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
590 {
591   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
592   Mat            A      = aij->A; /* diagonal part of the matrix */
593   Mat            B      = aij->B; /* offdiagonal part of the matrix */
594   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
595   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
596   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
598   PetscInt       *ailen = a->ilen,*aj = a->j;
599   PetscInt       *bilen = b->ilen,*bj = b->j;
600   PetscInt       am     = aij->A->rmap->n,j;
601   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
602   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
603   PetscScalar    *aa = a->a,*ba = b->a;
604 
605   PetscFunctionBegin;
606   /* Iterate over all rows of the matrix */
607   for (j=0; j<am; j++) {
608     dnz_row = onz_row = 0;
609     rowstart_offd = full_offd_i[j];
610     rowstart_diag = full_diag_i[j];
611     /*  Iterate over all non-zero columns of the current row */
612     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
613       /* If column is in the diagonal */
614       if (mat_j[col] >= cstart && mat_j[col] < cend) {
615         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
616         aa[rowstart_diag+dnz_row] = mat_a[col];
617         dnz_row++;
618       } else { /* off-diagonal entries */
619         bj[rowstart_offd+onz_row] = mat_j[col];
620         ba[rowstart_offd+onz_row] = mat_a[col];
621         onz_row++;
622       }
623     }
624     ailen[j] = dnz_row;
625     bilen[j] = onz_row;
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
631 {
632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
633   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
634   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* negative row */
639     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= rstart && idxm[i] < rend) {
641       row = idxm[i] - rstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* negative column */
644         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= cstart && idxn[j] < cend) {
646           col  = idxn[j] - cstart;
647           PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j));
648         } else {
649           if (!aij->colmap) {
650             PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
651           }
652 #if defined(PETSC_USE_CTABLE)
653           PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col));
654           col--;
655 #else
656           col = aij->colmap[idxn[j]] - 1;
657 #endif
658           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
659           else {
660             PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j));
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
672   PetscInt       nstash,reallocs;
673 
674   PetscFunctionBegin;
675   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
676 
677   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
678   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
679   PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
680   PetscFunctionReturn(0);
681 }
682 
683 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
684 {
685   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
686   PetscMPIInt    n;
687   PetscInt       i,j,rstart,ncols,flg;
688   PetscInt       *row,*col;
689   PetscBool      other_disassembled;
690   PetscScalar    *val;
691 
692   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
693 
694   PetscFunctionBegin;
695   if (!aij->donotstash && !mat->nooffprocentries) {
696     while (1) {
697       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
698       if (!flg) break;
699 
700       for (i=0; i<n;) {
701         /* Now identify the consecutive vals belonging to the same row */
702         for (j=i,rstart=row[j]; j<n; j++) {
703           if (row[j] != rstart) break;
704         }
705         if (j < n) ncols = j-i;
706         else       ncols = n-i;
707         /* Now assemble all these values with a single function call */
708         PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
709         i    = j;
710       }
711     }
712     PetscCall(MatStashScatterEnd_Private(&mat->stash));
713   }
714 #if defined(PETSC_HAVE_DEVICE)
715   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
716   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
717   if (mat->boundtocpu) {
718     PetscCall(MatBindToCPU(aij->A,PETSC_TRUE));
719     PetscCall(MatBindToCPU(aij->B,PETSC_TRUE));
720   }
721 #endif
722   PetscCall(MatAssemblyBegin(aij->A,mode));
723   PetscCall(MatAssemblyEnd(aij->A,mode));
724 
725   /* determine if any processor has disassembled, if so we must
726      also disassemble ourself, in order that we may reassemble. */
727   /*
728      if nonzero structure of submatrix B cannot change then we know that
729      no processor disassembled thus we can skip this stuff
730   */
731   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
732     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
733     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
734       PetscCall(MatDisAssemble_MPIAIJ(mat));
735     }
736   }
737   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
738     PetscCall(MatSetUpMultiply_MPIAIJ(mat));
739   }
740   PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE));
741 #if defined(PETSC_HAVE_DEVICE)
742   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
743 #endif
744   PetscCall(MatAssemblyBegin(aij->B,mode));
745   PetscCall(MatAssemblyEnd(aij->B,mode));
746 
747   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
748 
749   aij->rowvalues = NULL;
750 
751   PetscCall(VecDestroy(&aij->diag));
752 
753   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
754   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
755     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
756     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
757   }
758 #if defined(PETSC_HAVE_DEVICE)
759   mat->offloadmask = PETSC_OFFLOAD_BOTH;
760 #endif
761   PetscFunctionReturn(0);
762 }
763 
764 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
765 {
766   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
767 
768   PetscFunctionBegin;
769   PetscCall(MatZeroEntries(l->A));
770   PetscCall(MatZeroEntries(l->B));
771   PetscFunctionReturn(0);
772 }
773 
774 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
775 {
776   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
777   PetscObjectState sA, sB;
778   PetscInt        *lrows;
779   PetscInt         r, len;
780   PetscBool        cong, lch, gch;
781 
782   PetscFunctionBegin;
783   /* get locally owned rows */
784   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
785   PetscCall(MatHasCongruentLayouts(A,&cong));
786   /* fix right hand side if needed */
787   if (x && b) {
788     const PetscScalar *xx;
789     PetscScalar       *bb;
790 
791     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
792     PetscCall(VecGetArrayRead(x, &xx));
793     PetscCall(VecGetArray(b, &bb));
794     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
795     PetscCall(VecRestoreArrayRead(x, &xx));
796     PetscCall(VecRestoreArray(b, &bb));
797   }
798 
799   sA = mat->A->nonzerostate;
800   sB = mat->B->nonzerostate;
801 
802   if (diag != 0.0 && cong) {
803     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
804     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
805   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
806     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
807     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
808     PetscInt   nnwA, nnwB;
809     PetscBool  nnzA, nnzB;
810 
811     nnwA = aijA->nonew;
812     nnwB = aijB->nonew;
813     nnzA = aijA->keepnonzeropattern;
814     nnzB = aijB->keepnonzeropattern;
815     if (!nnzA) {
816       PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
817       aijA->nonew = 0;
818     }
819     if (!nnzB) {
820       PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
821       aijB->nonew = 0;
822     }
823     /* Must zero here before the next loop */
824     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
825     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
826     for (r = 0; r < len; ++r) {
827       const PetscInt row = lrows[r] + A->rmap->rstart;
828       if (row >= A->cmap->N) continue;
829       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
830     }
831     aijA->nonew = nnwA;
832     aijB->nonew = nnwB;
833   } else {
834     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
835     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
836   }
837   PetscCall(PetscFree(lrows));
838   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
839   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
840 
841   /* reduce nonzerostate */
842   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
843   PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A)));
844   if (gch) A->nonzerostate++;
845   PetscFunctionReturn(0);
846 }
847 
848 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
849 {
850   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
851   PetscMPIInt       n = A->rmap->n;
852   PetscInt          i,j,r,m,len = 0;
853   PetscInt          *lrows,*owners = A->rmap->range;
854   PetscMPIInt       p = 0;
855   PetscSFNode       *rrows;
856   PetscSF           sf;
857   const PetscScalar *xx;
858   PetscScalar       *bb,*mask,*aij_a;
859   Vec               xmask,lmask;
860   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
861   const PetscInt    *aj, *ii,*ridx;
862   PetscScalar       *aa;
863 
864   PetscFunctionBegin;
865   /* Create SF where leaves are input rows and roots are owned rows */
866   PetscCall(PetscMalloc1(n, &lrows));
867   for (r = 0; r < n; ++r) lrows[r] = -1;
868   PetscCall(PetscMalloc1(N, &rrows));
869   for (r = 0; r < N; ++r) {
870     const PetscInt idx   = rows[r];
871     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
872     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
873       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
874     }
875     rrows[r].rank  = p;
876     rrows[r].index = rows[r] - owners[p];
877   }
878   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
879   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
880   /* Collect flags for rows to be zeroed */
881   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
882   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
883   PetscCall(PetscSFDestroy(&sf));
884   /* Compress and put in row numbers */
885   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
886   /* zero diagonal part of matrix */
887   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
888   /* handle off diagonal part of matrix */
889   PetscCall(MatCreateVecs(A,&xmask,NULL));
890   PetscCall(VecDuplicate(l->lvec,&lmask));
891   PetscCall(VecGetArray(xmask,&bb));
892   for (i=0; i<len; i++) bb[lrows[i]] = 1;
893   PetscCall(VecRestoreArray(xmask,&bb));
894   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
895   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
896   PetscCall(VecDestroy(&xmask));
897   if (x && b) { /* this code is buggy when the row and column layout don't match */
898     PetscBool cong;
899 
900     PetscCall(MatHasCongruentLayouts(A,&cong));
901     PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
902     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
903     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
904     PetscCall(VecGetArrayRead(l->lvec,&xx));
905     PetscCall(VecGetArray(b,&bb));
906   }
907   PetscCall(VecGetArray(lmask,&mask));
908   /* remove zeroed rows of off diagonal matrix */
909   PetscCall(MatSeqAIJGetArray(l->B,&aij_a));
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]));
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij_a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij_a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x && b) {
951     PetscCall(VecRestoreArray(b,&bb));
952     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
953   }
954   PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a));
955   PetscCall(VecRestoreArray(lmask,&mask));
956   PetscCall(VecDestroy(&lmask));
957   PetscCall(PetscFree(lrows));
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscInt       nt;
971   VecScatter     Mvctx = a->Mvctx;
972 
973   PetscFunctionBegin;
974   PetscCall(VecGetLocalSize(xx,&nt));
975   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
976   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
977   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
978   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
979   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986 
987   PetscFunctionBegin;
988   PetscCall(MatMultDiagonalBlock(a->A,bb,xx));
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
993 {
994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
995   VecScatter     Mvctx = a->Mvctx;
996 
997   PetscFunctionBegin;
998   PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
999   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
1000   PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1001   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1006 {
1007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1008 
1009   PetscFunctionBegin;
1010   /* do nondiagonal part */
1011   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1012   /* do local part */
1013   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1014   /* add partial results together */
1015   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1016   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1021 {
1022   MPI_Comm       comm;
1023   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1024   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1025   IS             Me,Notme;
1026   PetscInt       M,N,first,last,*notme,i;
1027   PetscBool      lf;
1028   PetscMPIInt    size;
1029 
1030   PetscFunctionBegin;
1031   /* Easy test: symmetric diagonal block */
1032   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1033   PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf));
1034   PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat)));
1035   if (!*f) PetscFunctionReturn(0);
1036   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1037   PetscCallMPI(MPI_Comm_size(comm,&size));
1038   if (size == 1) PetscFunctionReturn(0);
1039 
1040   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1041   PetscCall(MatGetSize(Amat,&M,&N));
1042   PetscCall(MatGetOwnershipRange(Amat,&first,&last));
1043   PetscCall(PetscMalloc1(N-last+first,&notme));
1044   for (i=0; i<first; i++) notme[i] = i;
1045   for (i=last; i<M; i++) notme[i-last+first] = i;
1046   PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme));
1047   PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me));
1048   PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs));
1049   Aoff = Aoffs[0];
1050   PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs));
1051   Boff = Boffs[0];
1052   PetscCall(MatIsTranspose(Aoff,Boff,tol,f));
1053   PetscCall(MatDestroyMatrices(1,&Aoffs));
1054   PetscCall(MatDestroyMatrices(1,&Boffs));
1055   PetscCall(ISDestroy(&Me));
1056   PetscCall(ISDestroy(&Notme));
1057   PetscCall(PetscFree(notme));
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1062 {
1063   PetscFunctionBegin;
1064   PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f));
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1069 {
1070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1075   /* do local part */
1076   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1077   /* add partial results together */
1078   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1079   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /*
1084   This only works correctly for square matrices where the subblock A->A is the
1085    diagonal block
1086 */
1087 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090 
1091   PetscFunctionBegin;
1092   PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1093   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1094   PetscCall(MatGetDiagonal(a->A,v));
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1099 {
1100   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1101 
1102   PetscFunctionBegin;
1103   PetscCall(MatScale(a->A,aa));
1104   PetscCall(MatScale(a->B,aa));
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1109 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1110 {
1111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(PetscSFDestroy(&aij->coo_sf));
1115   PetscCall(PetscFree(aij->Aperm1));
1116   PetscCall(PetscFree(aij->Bperm1));
1117   PetscCall(PetscFree(aij->Ajmap1));
1118   PetscCall(PetscFree(aij->Bjmap1));
1119 
1120   PetscCall(PetscFree(aij->Aimap2));
1121   PetscCall(PetscFree(aij->Bimap2));
1122   PetscCall(PetscFree(aij->Aperm2));
1123   PetscCall(PetscFree(aij->Bperm2));
1124   PetscCall(PetscFree(aij->Ajmap2));
1125   PetscCall(PetscFree(aij->Bjmap2));
1126 
1127   PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf));
1128   PetscCall(PetscFree(aij->Cperm1));
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1133 {
1134   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1135 
1136   PetscFunctionBegin;
1137 #if defined(PETSC_USE_LOG)
1138   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1139 #endif
1140   PetscCall(MatStashDestroy_Private(&mat->stash));
1141   PetscCall(VecDestroy(&aij->diag));
1142   PetscCall(MatDestroy(&aij->A));
1143   PetscCall(MatDestroy(&aij->B));
1144 #if defined(PETSC_USE_CTABLE)
1145   PetscCall(PetscTableDestroy(&aij->colmap));
1146 #else
1147   PetscCall(PetscFree(aij->colmap));
1148 #endif
1149   PetscCall(PetscFree(aij->garray));
1150   PetscCall(VecDestroy(&aij->lvec));
1151   PetscCall(VecScatterDestroy(&aij->Mvctx));
1152   PetscCall(PetscFree2(aij->rowvalues,aij->rowindices));
1153   PetscCall(PetscFree(aij->ld));
1154 
1155   /* Free COO */
1156   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1157 
1158   PetscCall(PetscFree(mat->data));
1159 
1160   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1161   PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL));
1162 
1163   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1166   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL));
1167   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL));
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL));
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL));
1173 #if defined(PETSC_HAVE_CUDA)
1174   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL));
1175 #endif
1176 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1177   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL));
1178 #endif
1179   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
1180 #if defined(PETSC_HAVE_ELEMENTAL)
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL));
1182 #endif
1183 #if defined(PETSC_HAVE_SCALAPACK)
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL));
1185 #endif
1186 #if defined(PETSC_HAVE_HYPRE)
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL));
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL));
1189 #endif
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL));
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL));
1196 #if defined(PETSC_HAVE_MKL_SPARSE)
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL));
1198 #endif
1199   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL));
1200   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL));
1201   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL));
1202   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL));
1203   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL));
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1208 {
1209   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1210   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1211   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1212   const PetscInt    *garray = aij->garray;
1213   const PetscScalar *aa,*ba;
1214   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1215   PetscInt          *rowlens;
1216   PetscInt          *colidxs;
1217   PetscScalar       *matvals;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(PetscViewerSetUp(viewer));
1221 
1222   M  = mat->rmap->N;
1223   N  = mat->cmap->N;
1224   m  = mat->rmap->n;
1225   rs = mat->rmap->rstart;
1226   cs = mat->cmap->rstart;
1227   nz = A->nz + B->nz;
1228 
1229   /* write matrix header */
1230   header[0] = MAT_FILE_CLASSID;
1231   header[1] = M; header[2] = N; header[3] = nz;
1232   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1233   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1234 
1235   /* fill in and store row lengths  */
1236   PetscCall(PetscMalloc1(m,&rowlens));
1237   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1238   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1239   PetscCall(PetscFree(rowlens));
1240 
1241   /* fill in and store column indices */
1242   PetscCall(PetscMalloc1(nz,&colidxs));
1243   for (cnt=0, i=0; i<m; i++) {
1244     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1245       if (garray[B->j[jb]] > cs) break;
1246       colidxs[cnt++] = garray[B->j[jb]];
1247     }
1248     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1249       colidxs[cnt++] = A->j[ja] + cs;
1250     for (; jb<B->i[i+1]; jb++)
1251       colidxs[cnt++] = garray[B->j[jb]];
1252   }
1253   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1254   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
1255   PetscCall(PetscFree(colidxs));
1256 
1257   /* fill in and store nonzero values */
1258   PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa));
1259   PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba));
1260   PetscCall(PetscMalloc1(nz,&matvals));
1261   for (cnt=0, i=0; i<m; i++) {
1262     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1263       if (garray[B->j[jb]] > cs) break;
1264       matvals[cnt++] = ba[jb];
1265     }
1266     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1267       matvals[cnt++] = aa[ja];
1268     for (; jb<B->i[i+1]; jb++)
1269       matvals[cnt++] = ba[jb];
1270   }
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa));
1272   PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba));
1273   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1274   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
1275   PetscCall(PetscFree(matvals));
1276 
1277   /* write block size option to the viewer's .info file */
1278   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #include <petscdraw.h>
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscMPIInt       rank = aij->rank,size = aij->size;
1287   PetscBool         isdraw,iascii,isbinary;
1288   PetscViewer       sviewer;
1289   PetscViewerFormat format;
1290 
1291   PetscFunctionBegin;
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1294   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1295   if (iascii) {
1296     PetscCall(PetscViewerGetFormat(viewer,&format));
1297     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1298       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1299       PetscCall(PetscMalloc1(size,&nz));
1300       PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat)));
1301       for (i=0; i<(PetscInt)size; i++) {
1302         nmax = PetscMax(nmax,nz[i]);
1303         nmin = PetscMin(nmin,nz[i]);
1304         navg += nz[i];
1305       }
1306       PetscCall(PetscFree(nz));
1307       navg = navg/size;
1308       PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax));
1309       PetscFunctionReturn(0);
1310     }
1311     PetscCall(PetscViewerGetFormat(viewer,&format));
1312     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1313       MatInfo   info;
1314       PetscInt *inodes=NULL;
1315 
1316       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
1317       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
1318       PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL));
1319       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1320       if (!inodes) {
1321         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1322                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1323       } else {
1324         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1325                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1326       }
1327       PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info));
1328       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1329       PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info));
1330       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
1331       PetscCall(PetscViewerFlush(viewer));
1332       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1333       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
1334       PetscCall(VecScatterView(aij->Mvctx,viewer));
1335       PetscFunctionReturn(0);
1336     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1337       PetscInt inodecount,inodelimit,*inodes;
1338       PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit));
1339       if (inodes) {
1340         PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit));
1341       } else {
1342         PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n"));
1343       }
1344       PetscFunctionReturn(0);
1345     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1346       PetscFunctionReturn(0);
1347     }
1348   } else if (isbinary) {
1349     if (size == 1) {
1350       PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1351       PetscCall(MatView(aij->A,viewer));
1352     } else {
1353       PetscCall(MatView_MPIAIJ_Binary(mat,viewer));
1354     }
1355     PetscFunctionReturn(0);
1356   } else if (iascii && size == 1) {
1357     PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name));
1358     PetscCall(MatView(aij->A,viewer));
1359     PetscFunctionReturn(0);
1360   } else if (isdraw) {
1361     PetscDraw draw;
1362     PetscBool isnull;
1363     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1364     PetscCall(PetscDrawIsNull(draw,&isnull));
1365     if (isnull) PetscFunctionReturn(0);
1366   }
1367 
1368   { /* assemble the entire matrix onto first processor */
1369     Mat A = NULL, Av;
1370     IS  isrow,iscol;
1371 
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1373     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1374     PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A));
1375     PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL));
1376 /*  The commented code uses MatCreateSubMatrices instead */
1377 /*
1378     Mat *AA, A = NULL, Av;
1379     IS  isrow,iscol;
1380 
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1382     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1383     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1384     if (rank == 0) {
1385        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1386        A    = AA[0];
1387        Av   = AA[0];
1388     }
1389     PetscCall(MatDestroySubMatrices(1,&AA));
1390 */
1391     PetscCall(ISDestroy(&iscol));
1392     PetscCall(ISDestroy(&isrow));
1393     /*
1394        Everyone has to call to draw the matrix since the graphics waits are
1395        synchronized across all processors that share the PetscDraw object
1396     */
1397     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1398     if (rank == 0) {
1399       if (((PetscObject)mat)->name) {
1400         PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name));
1401       }
1402       PetscCall(MatView_SeqAIJ(Av,sviewer));
1403     }
1404     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1405     PetscCall(PetscViewerFlush(viewer));
1406     PetscCall(MatDestroy(&A));
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1412 {
1413   PetscBool      iascii,isdraw,issocket,isbinary;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1418   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1419   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1420   if (iascii || isdraw || isbinary || issocket) {
1421     PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer));
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1427 {
1428   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1429   Vec            bb1 = NULL;
1430   PetscBool      hasop;
1431 
1432   PetscFunctionBegin;
1433   if (flag == SOR_APPLY_UPPER) {
1434     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1435     PetscFunctionReturn(0);
1436   }
1437 
1438   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1439     PetscCall(VecDuplicate(bb,&bb1));
1440   }
1441 
1442   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1445       its--;
1446     }
1447 
1448     while (its--) {
1449       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1450       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1451 
1452       /* update rhs: bb1 = bb - B*x */
1453       PetscCall(VecScale(mat->lvec,-1.0));
1454       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1455 
1456       /* local sweep */
1457       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
1458     }
1459   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1460     if (flag & SOR_ZERO_INITIAL_GUESS) {
1461       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1462       its--;
1463     }
1464     while (its--) {
1465       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1466       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1467 
1468       /* update rhs: bb1 = bb - B*x */
1469       PetscCall(VecScale(mat->lvec,-1.0));
1470       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1471 
1472       /* local sweep */
1473       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
1474     }
1475   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1476     if (flag & SOR_ZERO_INITIAL_GUESS) {
1477       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
1478       its--;
1479     }
1480     while (its--) {
1481       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1482       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1483 
1484       /* update rhs: bb1 = bb - B*x */
1485       PetscCall(VecScale(mat->lvec,-1.0));
1486       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
1487 
1488       /* local sweep */
1489       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
1490     }
1491   } else if (flag & SOR_EISENSTAT) {
1492     Vec xx1;
1493 
1494     PetscCall(VecDuplicate(bb,&xx1));
1495     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
1496 
1497     PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1498     PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
1499     if (!mat->diag) {
1500       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
1501       PetscCall(MatGetDiagonal(matin,mat->diag));
1502     }
1503     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
1504     if (hasop) {
1505       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
1506     } else {
1507       PetscCall(VecPointwiseMult(bb1,mat->diag,xx));
1508     }
1509     PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb));
1510 
1511     PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1));
1512 
1513     /* local sweep */
1514     PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
1515     PetscCall(VecAXPY(xx,1.0,xx1));
1516     PetscCall(VecDestroy(&xx1));
1517   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1518 
1519   PetscCall(VecDestroy(&bb1));
1520 
1521   matin->factorerrortype = mat->A->factorerrortype;
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1526 {
1527   Mat            aA,aB,Aperm;
1528   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1529   PetscScalar    *aa,*ba;
1530   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1531   PetscSF        rowsf,sf;
1532   IS             parcolp = NULL;
1533   PetscBool      done;
1534 
1535   PetscFunctionBegin;
1536   PetscCall(MatGetLocalSize(A,&m,&n));
1537   PetscCall(ISGetIndices(rowp,&rwant));
1538   PetscCall(ISGetIndices(colp,&cwant));
1539   PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest));
1540 
1541   /* Invert row permutation to find out where my rows should go */
1542   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf));
1543   PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant));
1544   PetscCall(PetscSFSetFromOptions(rowsf));
1545   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1546   PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1547   PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE));
1548 
1549   /* Invert column permutation to find out where my columns should go */
1550   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1551   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant));
1552   PetscCall(PetscSFSetFromOptions(sf));
1553   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1554   PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1555   PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE));
1556   PetscCall(PetscSFDestroy(&sf));
1557 
1558   PetscCall(ISRestoreIndices(rowp,&rwant));
1559   PetscCall(ISRestoreIndices(colp,&cwant));
1560   PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols));
1561 
1562   /* Find out where my gcols should go */
1563   PetscCall(MatGetSize(aB,NULL,&ng));
1564   PetscCall(PetscMalloc1(ng,&gcdest));
1565   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1566   PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols));
1567   PetscCall(PetscSFSetFromOptions(sf));
1568   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&sf));
1571 
1572   PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz));
1573   PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1574   PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1575   for (i=0; i<m; i++) {
1576     PetscInt    row = rdest[i];
1577     PetscMPIInt rowner;
1578     PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner));
1579     for (j=ai[i]; j<ai[i+1]; j++) {
1580       PetscInt    col = cdest[aj[j]];
1581       PetscMPIInt cowner;
1582       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */
1583       if (rowner == cowner) dnnz[i]++;
1584       else onnz[i]++;
1585     }
1586     for (j=bi[i]; j<bi[i+1]; j++) {
1587       PetscInt    col = gcdest[bj[j]];
1588       PetscMPIInt cowner;
1589       PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner));
1590       if (rowner == cowner) dnnz[i]++;
1591       else onnz[i]++;
1592     }
1593   }
1594   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1595   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE));
1596   PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1597   PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE));
1598   PetscCall(PetscSFDestroy(&rowsf));
1599 
1600   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm));
1601   PetscCall(MatSeqAIJGetArray(aA,&aa));
1602   PetscCall(MatSeqAIJGetArray(aB,&ba));
1603   for (i=0; i<m; i++) {
1604     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1605     PetscInt j0,rowlen;
1606     rowlen = ai[i+1] - ai[i];
1607     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1608       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1609       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES));
1610     }
1611     rowlen = bi[i+1] - bi[i];
1612     for (j0=j=0; j<rowlen; j0=j) {
1613       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1614       PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES));
1615     }
1616   }
1617   PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY));
1618   PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY));
1619   PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done));
1620   PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done));
1621   PetscCall(MatSeqAIJRestoreArray(aA,&aa));
1622   PetscCall(MatSeqAIJRestoreArray(aB,&ba));
1623   PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz));
1624   PetscCall(PetscFree3(work,rdest,cdest));
1625   PetscCall(PetscFree(gcdest));
1626   if (parcolp) PetscCall(ISDestroy(&colp));
1627   *B = Aperm;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1632 {
1633   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1634 
1635   PetscFunctionBegin;
1636   PetscCall(MatGetSize(aij->B,NULL,nghosts));
1637   if (ghosts) *ghosts = aij->garray;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1642 {
1643   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1644   Mat            A    = mat->A,B = mat->B;
1645   PetscLogDouble isend[5],irecv[5];
1646 
1647   PetscFunctionBegin;
1648   info->block_size = 1.0;
1649   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1650 
1651   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1652   isend[3] = info->memory;  isend[4] = info->mallocs;
1653 
1654   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1655 
1656   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1657   isend[3] += info->memory;  isend[4] += info->mallocs;
1658   if (flag == MAT_LOCAL) {
1659     info->nz_used      = isend[0];
1660     info->nz_allocated = isend[1];
1661     info->nz_unneeded  = isend[2];
1662     info->memory       = isend[3];
1663     info->mallocs      = isend[4];
1664   } else if (flag == MAT_GLOBAL_MAX) {
1665     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   } else if (flag == MAT_GLOBAL_SUM) {
1673     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1674 
1675     info->nz_used      = irecv[0];
1676     info->nz_allocated = irecv[1];
1677     info->nz_unneeded  = irecv[2];
1678     info->memory       = irecv[3];
1679     info->mallocs      = irecv[4];
1680   }
1681   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1682   info->fill_ratio_needed = 0;
1683   info->factor_mallocs    = 0;
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1688 {
1689   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1690 
1691   PetscFunctionBegin;
1692   switch (op) {
1693   case MAT_NEW_NONZERO_LOCATIONS:
1694   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696   case MAT_KEEP_NONZERO_PATTERN:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698   case MAT_USE_INODES:
1699   case MAT_IGNORE_ZERO_ENTRIES:
1700   case MAT_FORM_EXPLICIT_TRANSPOSE:
1701     MatCheckPreallocated(A,1);
1702     PetscCall(MatSetOption(a->A,op,flg));
1703     PetscCall(MatSetOption(a->B,op,flg));
1704     break;
1705   case MAT_ROW_ORIENTED:
1706     MatCheckPreallocated(A,1);
1707     a->roworiented = flg;
1708 
1709     PetscCall(MatSetOption(a->A,op,flg));
1710     PetscCall(MatSetOption(a->B,op,flg));
1711     break;
1712   case MAT_FORCE_DIAGONAL_ENTRIES:
1713   case MAT_SORTED_FULL:
1714     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1715     break;
1716   case MAT_IGNORE_OFF_PROC_ENTRIES:
1717     a->donotstash = flg;
1718     break;
1719   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1720   case MAT_SPD:
1721   case MAT_SYMMETRIC:
1722   case MAT_STRUCTURALLY_SYMMETRIC:
1723   case MAT_HERMITIAN:
1724   case MAT_SYMMETRY_ETERNAL:
1725   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1726   case MAT_SPD_ETERNAL:
1727     /* if the diagonal matrix is square it inherits some of the properties above */
1728     break;
1729   case MAT_SUBMAT_SINGLEIS:
1730     A->submat_singleis = flg;
1731     break;
1732   case MAT_STRUCTURE_ONLY:
1733     /* The option is handled directly by MatSetOption() */
1734     break;
1735   default:
1736     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1745   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1746   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1747   PetscInt       *cmap,*idx_p;
1748 
1749   PetscFunctionBegin;
1750   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1751   mat->getrowactive = PETSC_TRUE;
1752 
1753   if (!mat->rowvalues && (idx || v)) {
1754     /*
1755         allocate enough space to hold information from the longest row.
1756     */
1757     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1758     PetscInt   max = 1,tmp;
1759     for (i=0; i<matin->rmap->n; i++) {
1760       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1761       if (max < tmp) max = tmp;
1762     }
1763     PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices));
1764   }
1765 
1766   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1767   lrow = row - rstart;
1768 
1769   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1770   if (!v)   {pvA = NULL; pvB = NULL;}
1771   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1772   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1773   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1774   nztot = nzA + nzB;
1775 
1776   cmap = mat->garray;
1777   if (v  || idx) {
1778     if (nztot) {
1779       /* Sort by increasing column numbers, assuming A and B already sorted */
1780       PetscInt imark = -1;
1781       if (v) {
1782         *v = v_p = mat->rowvalues;
1783         for (i=0; i<nzB; i++) {
1784           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1785           else break;
1786         }
1787         imark = i;
1788         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1789         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1790       }
1791       if (idx) {
1792         *idx = idx_p = mat->rowindices;
1793         if (imark > -1) {
1794           for (i=0; i<imark; i++) {
1795             idx_p[i] = cmap[cworkB[i]];
1796           }
1797         } else {
1798           for (i=0; i<nzB; i++) {
1799             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1800             else break;
1801           }
1802           imark = i;
1803         }
1804         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1805         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1806       }
1807     } else {
1808       if (idx) *idx = NULL;
1809       if (v)   *v   = NULL;
1810     }
1811   }
1812   *nz  = nztot;
1813   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1814   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1819 {
1820   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1821 
1822   PetscFunctionBegin;
1823   PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1824   aij->getrowactive = PETSC_FALSE;
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1829 {
1830   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1831   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1832   PetscInt        i,j,cstart = mat->cmap->rstart;
1833   PetscReal       sum = 0.0;
1834   const MatScalar *v,*amata,*bmata;
1835 
1836   PetscFunctionBegin;
1837   if (aij->size == 1) {
1838     PetscCall(MatNorm(aij->A,type,norm));
1839   } else {
1840     PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata));
1841     PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata));
1842     if (type == NORM_FROBENIUS) {
1843       v = amata;
1844       for (i=0; i<amat->nz; i++) {
1845         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1846       }
1847       v = bmata;
1848       for (i=0; i<bmat->nz; i++) {
1849         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1850       }
1851       PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1852       *norm = PetscSqrtReal(*norm);
1853       PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz));
1854     } else if (type == NORM_1) { /* max column norm */
1855       PetscReal *tmp,*tmp2;
1856       PetscInt  *jj,*garray = aij->garray;
1857       PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp));
1858       PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2));
1859       *norm = 0.0;
1860       v     = amata; jj = amat->j;
1861       for (j=0; j<amat->nz; j++) {
1862         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1863       }
1864       v = bmata; jj = bmat->j;
1865       for (j=0; j<bmat->nz; j++) {
1866         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1867       }
1868       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
1869       for (j=0; j<mat->cmap->N; j++) {
1870         if (tmp2[j] > *norm) *norm = tmp2[j];
1871       }
1872       PetscCall(PetscFree(tmp));
1873       PetscCall(PetscFree(tmp2));
1874       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1875     } else if (type == NORM_INFINITY) { /* max row norm */
1876       PetscReal ntemp = 0.0;
1877       for (j=0; j<aij->A->rmap->n; j++) {
1878         v   = amata + amat->i[j];
1879         sum = 0.0;
1880         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v); v++;
1882         }
1883         v = bmata + bmat->i[j];
1884         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1885           sum += PetscAbsScalar(*v); v++;
1886         }
1887         if (sum > ntemp) ntemp = sum;
1888       }
1889       PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
1890       PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0)));
1891     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1892     PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata));
1893     PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata));
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1899 {
1900   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1901   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1902   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1903   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1904   Mat             B,A_diag,*B_diag;
1905   const MatScalar *pbv,*bv;
1906 
1907   PetscFunctionBegin;
1908   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1909   ai = Aloc->i; aj = Aloc->j;
1910   bi = Bloc->i; bj = Bloc->j;
1911   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1912     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1913     PetscSFNode          *oloc;
1914     PETSC_UNUSED PetscSF sf;
1915 
1916     PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc));
1917     /* compute d_nnz for preallocation */
1918     PetscCall(PetscArrayzero(d_nnz,na));
1919     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1920     /* compute local off-diagonal contributions */
1921     PetscCall(PetscArrayzero(g_nnz,nb));
1922     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1923     /* map those to global */
1924     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf));
1925     PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray));
1926     PetscCall(PetscSFSetFromOptions(sf));
1927     PetscCall(PetscArrayzero(o_nnz,na));
1928     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1929     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM));
1930     PetscCall(PetscSFDestroy(&sf));
1931 
1932     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1933     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1934     PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1935     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1936     PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
1937     PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc));
1938   } else {
1939     B    = *matout;
1940     PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
1941   }
1942 
1943   b           = (Mat_MPIAIJ*)B->data;
1944   A_diag      = a->A;
1945   B_diag      = &b->A;
1946   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1947   A_diag_ncol = A_diag->cmap->N;
1948   B_diag_ilen = sub_B_diag->ilen;
1949   B_diag_i    = sub_B_diag->i;
1950 
1951   /* Set ilen for diagonal of B */
1952   for (i=0; i<A_diag_ncol; i++) {
1953     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1954   }
1955 
1956   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1957   very quickly (=without using MatSetValues), because all writes are local. */
1958   PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag));
1959 
1960   /* copy over the B part */
1961   PetscCall(PetscMalloc1(bi[mb],&cols));
1962   PetscCall(MatSeqAIJGetArrayRead(a->B,&bv));
1963   pbv  = bv;
1964   row  = A->rmap->rstart;
1965   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1966   cols_tmp = cols;
1967   for (i=0; i<mb; i++) {
1968     ncol = bi[i+1]-bi[i];
1969     PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES));
1970     row++;
1971     pbv += ncol; cols_tmp += ncol;
1972   }
1973   PetscCall(PetscFree(cols));
1974   PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv));
1975 
1976   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1977   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1978   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1979     *matout = B;
1980   } else {
1981     PetscCall(MatHeaderMerge(A,&B));
1982   }
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1987 {
1988   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1989   Mat            a    = aij->A,b = aij->B;
1990   PetscInt       s1,s2,s3;
1991 
1992   PetscFunctionBegin;
1993   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1994   if (rr) {
1995     PetscCall(VecGetLocalSize(rr,&s1));
1996     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1997     /* Overlap communication with computation. */
1998     PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1999   }
2000   if (ll) {
2001     PetscCall(VecGetLocalSize(ll,&s1));
2002     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2003     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
2004   }
2005   /* scale  the diagonal block */
2006   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
2007 
2008   if (rr) {
2009     /* Do a scatter end and then right scale the off-diagonal block */
2010     PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD));
2011     PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec));
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2017 {
2018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2019 
2020   PetscFunctionBegin;
2021   PetscCall(MatSetUnfactored(a->A));
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2026 {
2027   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2028   Mat            a,b,c,d;
2029   PetscBool      flg;
2030 
2031   PetscFunctionBegin;
2032   a = matA->A; b = matA->B;
2033   c = matB->A; d = matB->B;
2034 
2035   PetscCall(MatEqual(a,c,&flg));
2036   if (flg) {
2037     PetscCall(MatEqual(b,d,&flg));
2038   }
2039   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2044 {
2045   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2046   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2047 
2048   PetscFunctionBegin;
2049   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2050   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2051     /* because of the column compression in the off-processor part of the matrix a->B,
2052        the number of columns in a->B and b->B may be different, hence we cannot call
2053        the MatCopy() directly on the two parts. If need be, we can provide a more
2054        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2055        then copying the submatrices */
2056     PetscCall(MatCopy_Basic(A,B,str));
2057   } else {
2058     PetscCall(MatCopy(a->A,b->A,str));
2059     PetscCall(MatCopy(a->B,b->B,str));
2060   }
2061   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2066 {
2067   PetscFunctionBegin;
2068   PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 /*
2073    Computes the number of nonzeros per row needed for preallocation when X and Y
2074    have different nonzero structure.
2075 */
2076 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2077 {
2078   PetscInt       i,j,k,nzx,nzy;
2079 
2080   PetscFunctionBegin;
2081   /* Set the number of nonzeros in the new matrix */
2082   for (i=0; i<m; i++) {
2083     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2084     nzx = xi[i+1] - xi[i];
2085     nzy = yi[i+1] - yi[i];
2086     nnz[i] = 0;
2087     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2088       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2089       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2090       nnz[i]++;
2091     }
2092     for (; k<nzy; k++) nnz[i]++;
2093   }
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2098 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2099 {
2100   PetscInt       m = Y->rmap->N;
2101   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2102   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2103 
2104   PetscFunctionBegin;
2105   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2110 {
2111   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2112 
2113   PetscFunctionBegin;
2114   if (str == SAME_NONZERO_PATTERN) {
2115     PetscCall(MatAXPY(yy->A,a,xx->A,str));
2116     PetscCall(MatAXPY(yy->B,a,xx->B,str));
2117   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2118     PetscCall(MatAXPY_Basic(Y,a,X,str));
2119   } else {
2120     Mat      B;
2121     PetscInt *nnz_d,*nnz_o;
2122 
2123     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
2124     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
2125     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2126     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2127     PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap));
2128     PetscCall(MatSetType(B,((PetscObject)Y)->type_name));
2129     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d));
2130     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
2131     PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o));
2132     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2133     PetscCall(MatHeaderMerge(Y,&B));
2134     PetscCall(PetscFree(nnz_d));
2135     PetscCall(PetscFree(nnz_o));
2136   }
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2141 
2142 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2143 {
2144   PetscFunctionBegin;
2145   if (PetscDefined(USE_COMPLEX)) {
2146     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2147 
2148     PetscCall(MatConjugate_SeqAIJ(aij->A));
2149     PetscCall(MatConjugate_SeqAIJ(aij->B));
2150   }
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2155 {
2156   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2157 
2158   PetscFunctionBegin;
2159   PetscCall(MatRealPart(a->A));
2160   PetscCall(MatRealPart(a->B));
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2165 {
2166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2167 
2168   PetscFunctionBegin;
2169   PetscCall(MatImaginaryPart(a->A));
2170   PetscCall(MatImaginaryPart(a->B));
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2175 {
2176   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2177   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2178   PetscScalar       *va,*vv;
2179   Vec               vB,vA;
2180   const PetscScalar *vb;
2181 
2182   PetscFunctionBegin;
2183   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
2184   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
2185 
2186   PetscCall(VecGetArrayWrite(vA,&va));
2187   if (idx) {
2188     for (i=0; i<m; i++) {
2189       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2190     }
2191   }
2192 
2193   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
2194   PetscCall(PetscMalloc1(m,&idxb));
2195   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
2196 
2197   PetscCall(VecGetArrayWrite(v,&vv));
2198   PetscCall(VecGetArrayRead(vB,&vb));
2199   for (i=0; i<m; i++) {
2200     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2201       vv[i] = vb[i];
2202       if (idx) idx[i] = a->garray[idxb[i]];
2203     } else {
2204       vv[i] = va[i];
2205       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2206         idx[i] = a->garray[idxb[i]];
2207     }
2208   }
2209   PetscCall(VecRestoreArrayWrite(vA,&vv));
2210   PetscCall(VecRestoreArrayWrite(vA,&va));
2211   PetscCall(VecRestoreArrayRead(vB,&vb));
2212   PetscCall(PetscFree(idxb));
2213   PetscCall(VecDestroy(&vA));
2214   PetscCall(VecDestroy(&vB));
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2221   PetscInt          m = A->rmap->n,n = A->cmap->n;
2222   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2223   PetscInt          *cmap  = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec               diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba,*bav;
2228   PetscInt          r,j,col,ncols,*bi,*bj;
2229   Mat               B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v,&diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A,diagV,idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v,&diagA));
2240     PetscFunctionReturn(0);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v,&a));
2244       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2245       PetscCall(VecRestoreArrayWrite(v,&a));
2246     }
2247     PetscFunctionReturn(0);
2248   }
2249 
2250   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2251   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2252   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2253   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2254 
2255   /* Get offdiagIdx[] for implicit 0.0 */
2256   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2257   ba   = bav;
2258   bi   = b->i;
2259   bj   = b->j;
2260   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2261   for (r = 0; r < m; r++) {
2262     ncols = bi[r+1] - bi[r];
2263     if (ncols == A->cmap->N - n) { /* Brow is dense */
2264       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2265     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2266       offdiagA[r] = 0.0;
2267 
2268       /* Find first hole in the cmap */
2269       for (j=0; j<ncols; j++) {
2270         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2271         if (col > j && j < cstart) {
2272           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2273           break;
2274         } else if (col > j + n && j >= cstart) {
2275           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2276           break;
2277         }
2278       }
2279       if (j == ncols && ncols < A->cmap->N - n) {
2280         /* a hole is outside compressed Bcols */
2281         if (ncols == 0) {
2282           if (cstart) {
2283             offdiagIdx[r] = 0;
2284           } else offdiagIdx[r] = cend;
2285         } else { /* ncols > 0 */
2286           offdiagIdx[r] = cmap[ncols-1] + 1;
2287           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2288         }
2289       }
2290     }
2291 
2292     for (j=0; j<ncols; j++) {
2293       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2294       ba++; bj++;
2295     }
2296   }
2297 
2298   PetscCall(VecGetArrayWrite(v, &a));
2299   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2300   for (r = 0; r < m; ++r) {
2301     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2302       a[r]   = diagA[r];
2303       if (idx) idx[r] = cstart + diagIdx[r];
2304     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2305       a[r] = diagA[r];
2306       if (idx) {
2307         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2308           idx[r] = cstart + diagIdx[r];
2309         } else idx[r] = offdiagIdx[r];
2310       }
2311     } else {
2312       a[r]   = offdiagA[r];
2313       if (idx) idx[r] = offdiagIdx[r];
2314     }
2315   }
2316   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2317   PetscCall(VecRestoreArrayWrite(v, &a));
2318   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2319   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2320   PetscCall(VecDestroy(&diagV));
2321   PetscCall(VecDestroy(&offdiagV));
2322   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2323   PetscFunctionReturn(0);
2324 }
2325 
2326 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2327 {
2328   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2329   PetscInt          m = A->rmap->n,n = A->cmap->n;
2330   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2331   PetscInt          *cmap  = mat->garray;
2332   PetscInt          *diagIdx, *offdiagIdx;
2333   Vec               diagV, offdiagV;
2334   PetscScalar       *a, *diagA, *offdiagA;
2335   const PetscScalar *ba,*bav;
2336   PetscInt          r,j,col,ncols,*bi,*bj;
2337   Mat               B = mat->B;
2338   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2339 
2340   PetscFunctionBegin;
2341   /* When a process holds entire A and other processes have no entry */
2342   if (A->cmap->N == n) {
2343     PetscCall(VecGetArrayWrite(v,&diagA));
2344     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2345     PetscCall(MatGetRowMin(mat->A,diagV,idx));
2346     PetscCall(VecDestroy(&diagV));
2347     PetscCall(VecRestoreArrayWrite(v,&diagA));
2348     PetscFunctionReturn(0);
2349   } else if (n == 0) {
2350     if (m) {
2351       PetscCall(VecGetArrayWrite(v,&a));
2352       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2353       PetscCall(VecRestoreArrayWrite(v,&a));
2354     }
2355     PetscFunctionReturn(0);
2356   }
2357 
2358   PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx));
2359   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2360   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2361   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2362 
2363   /* Get offdiagIdx[] for implicit 0.0 */
2364   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2365   ba   = bav;
2366   bi   = b->i;
2367   bj   = b->j;
2368   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2369   for (r = 0; r < m; r++) {
2370     ncols = bi[r+1] - bi[r];
2371     if (ncols == A->cmap->N - n) { /* Brow is dense */
2372       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2373     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2374       offdiagA[r] = 0.0;
2375 
2376       /* Find first hole in the cmap */
2377       for (j=0; j<ncols; j++) {
2378         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2379         if (col > j && j < cstart) {
2380           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2381           break;
2382         } else if (col > j + n && j >= cstart) {
2383           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2384           break;
2385         }
2386       }
2387       if (j == ncols && ncols < A->cmap->N - n) {
2388         /* a hole is outside compressed Bcols */
2389         if (ncols == 0) {
2390           if (cstart) {
2391             offdiagIdx[r] = 0;
2392           } else offdiagIdx[r] = cend;
2393         } else { /* ncols > 0 */
2394           offdiagIdx[r] = cmap[ncols-1] + 1;
2395           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2396         }
2397       }
2398     }
2399 
2400     for (j=0; j<ncols; j++) {
2401       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2402       ba++; bj++;
2403     }
2404   }
2405 
2406   PetscCall(VecGetArrayWrite(v, &a));
2407   PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA));
2408   for (r = 0; r < m; ++r) {
2409     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2410       a[r]   = diagA[r];
2411       if (idx) idx[r] = cstart + diagIdx[r];
2412     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2413       a[r] = diagA[r];
2414       if (idx) {
2415         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2416           idx[r] = cstart + diagIdx[r];
2417         } else idx[r] = offdiagIdx[r];
2418       }
2419     } else {
2420       a[r]   = offdiagA[r];
2421       if (idx) idx[r] = offdiagIdx[r];
2422     }
2423   }
2424   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2425   PetscCall(VecRestoreArrayWrite(v, &a));
2426   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA));
2427   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2428   PetscCall(VecDestroy(&diagV));
2429   PetscCall(VecDestroy(&offdiagV));
2430   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2435 {
2436   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2437   PetscInt          m = A->rmap->n,n = A->cmap->n;
2438   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2439   PetscInt          *cmap  = mat->garray;
2440   PetscInt          *diagIdx, *offdiagIdx;
2441   Vec               diagV, offdiagV;
2442   PetscScalar       *a, *diagA, *offdiagA;
2443   const PetscScalar *ba,*bav;
2444   PetscInt          r,j,col,ncols,*bi,*bj;
2445   Mat               B = mat->B;
2446   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2447 
2448   PetscFunctionBegin;
2449   /* When a process holds entire A and other processes have no entry */
2450   if (A->cmap->N == n) {
2451     PetscCall(VecGetArrayWrite(v,&diagA));
2452     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV));
2453     PetscCall(MatGetRowMax(mat->A,diagV,idx));
2454     PetscCall(VecDestroy(&diagV));
2455     PetscCall(VecRestoreArrayWrite(v,&diagA));
2456     PetscFunctionReturn(0);
2457   } else if (n == 0) {
2458     if (m) {
2459       PetscCall(VecGetArrayWrite(v,&a));
2460       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2461       PetscCall(VecRestoreArrayWrite(v,&a));
2462     }
2463     PetscFunctionReturn(0);
2464   }
2465 
2466   PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx));
2467   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2468   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2469   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2470 
2471   /* Get offdiagIdx[] for implicit 0.0 */
2472   PetscCall(MatSeqAIJGetArrayRead(B,&bav));
2473   ba   = bav;
2474   bi   = b->i;
2475   bj   = b->j;
2476   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2477   for (r = 0; r < m; r++) {
2478     ncols = bi[r+1] - bi[r];
2479     if (ncols == A->cmap->N - n) { /* Brow is dense */
2480       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2481     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2482       offdiagA[r] = 0.0;
2483 
2484       /* Find first hole in the cmap */
2485       for (j=0; j<ncols; j++) {
2486         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2487         if (col > j && j < cstart) {
2488           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2489           break;
2490         } else if (col > j + n && j >= cstart) {
2491           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2492           break;
2493         }
2494       }
2495       if (j == ncols && ncols < A->cmap->N - n) {
2496         /* a hole is outside compressed Bcols */
2497         if (ncols == 0) {
2498           if (cstart) {
2499             offdiagIdx[r] = 0;
2500           } else offdiagIdx[r] = cend;
2501         } else { /* ncols > 0 */
2502           offdiagIdx[r] = cmap[ncols-1] + 1;
2503           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2504         }
2505       }
2506     }
2507 
2508     for (j=0; j<ncols; j++) {
2509       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2510       ba++; bj++;
2511     }
2512   }
2513 
2514   PetscCall(VecGetArrayWrite(v,    &a));
2515   PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA));
2516   for (r = 0; r < m; ++r) {
2517     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2518       a[r] = diagA[r];
2519       if (idx) idx[r] = cstart + diagIdx[r];
2520     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2521       a[r] = diagA[r];
2522       if (idx) {
2523         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2524           idx[r] = cstart + diagIdx[r];
2525         } else idx[r] = offdiagIdx[r];
2526       }
2527     } else {
2528       a[r] = offdiagA[r];
2529       if (idx) idx[r] = offdiagIdx[r];
2530     }
2531   }
2532   PetscCall(MatSeqAIJRestoreArrayRead(B,&bav));
2533   PetscCall(VecRestoreArrayWrite(v,       &a));
2534   PetscCall(VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA));
2535   PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA));
2536   PetscCall(VecDestroy(&diagV));
2537   PetscCall(VecDestroy(&offdiagV));
2538   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2543 {
2544   Mat            *dummy;
2545 
2546   PetscFunctionBegin;
2547   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy));
2548   *newmat = *dummy;
2549   PetscCall(PetscFree(dummy));
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2554 {
2555   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2556 
2557   PetscFunctionBegin;
2558   PetscCall(MatInvertBlockDiagonal(a->A,values));
2559   A->factorerrortype = a->A->factorerrortype;
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2564 {
2565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2566 
2567   PetscFunctionBegin;
2568   PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2569   PetscCall(MatSetRandom(aij->A,rctx));
2570   if (x->assembled) {
2571     PetscCall(MatSetRandom(aij->B,rctx));
2572   } else {
2573     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx));
2574   }
2575   PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY));
2576   PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY));
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2581 {
2582   PetscFunctionBegin;
2583   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2584   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 /*@
2589    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2590 
2591    Not collective
2592 
2593    Input Parameter:
2594 .    A - the matrix
2595 
2596    Output Parameter:
2597 .    nz - the number of nonzeros
2598 
2599  Level: advanced
2600 
2601 @*/
2602 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz)
2603 {
2604   Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data;
2605   Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data;
2606 
2607   PetscFunctionBegin;
2608   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 /*@
2613    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2614 
2615    Collective on Mat
2616 
2617    Input Parameters:
2618 +    A - the matrix
2619 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2620 
2621  Level: advanced
2622 
2623 @*/
2624 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2625 {
2626   PetscFunctionBegin;
2627   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2632 {
2633   PetscBool            sc = PETSC_FALSE,flg;
2634 
2635   PetscFunctionBegin;
2636   PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options");
2637   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2638   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg));
2639   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc));
2640   PetscOptionsHeadEnd();
2641   PetscFunctionReturn(0);
2642 }
2643 
2644 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2645 {
2646   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2647   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2648 
2649   PetscFunctionBegin;
2650   if (!Y->preallocated) {
2651     PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL));
2652   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2653     PetscInt nonew = aij->nonew;
2654     PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL));
2655     aij->nonew = nonew;
2656   }
2657   PetscCall(MatShift_Basic(Y,a));
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2662 {
2663   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2664 
2665   PetscFunctionBegin;
2666   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2667   PetscCall(MatMissingDiagonal(a->A,missing,d));
2668   if (d) {
2669     PetscInt rstart;
2670     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2671     *d += rstart;
2672 
2673   }
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2678 {
2679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2680 
2681   PetscFunctionBegin;
2682   PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag));
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 /* -------------------------------------------------------------------*/
2687 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2688                                        MatGetRow_MPIAIJ,
2689                                        MatRestoreRow_MPIAIJ,
2690                                        MatMult_MPIAIJ,
2691                                 /* 4*/ MatMultAdd_MPIAIJ,
2692                                        MatMultTranspose_MPIAIJ,
2693                                        MatMultTransposeAdd_MPIAIJ,
2694                                        NULL,
2695                                        NULL,
2696                                        NULL,
2697                                 /*10*/ NULL,
2698                                        NULL,
2699                                        NULL,
2700                                        MatSOR_MPIAIJ,
2701                                        MatTranspose_MPIAIJ,
2702                                 /*15*/ MatGetInfo_MPIAIJ,
2703                                        MatEqual_MPIAIJ,
2704                                        MatGetDiagonal_MPIAIJ,
2705                                        MatDiagonalScale_MPIAIJ,
2706                                        MatNorm_MPIAIJ,
2707                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2708                                        MatAssemblyEnd_MPIAIJ,
2709                                        MatSetOption_MPIAIJ,
2710                                        MatZeroEntries_MPIAIJ,
2711                                 /*24*/ MatZeroRows_MPIAIJ,
2712                                        NULL,
2713                                        NULL,
2714                                        NULL,
2715                                        NULL,
2716                                 /*29*/ MatSetUp_MPIAIJ,
2717                                        NULL,
2718                                        NULL,
2719                                        MatGetDiagonalBlock_MPIAIJ,
2720                                        NULL,
2721                                 /*34*/ MatDuplicate_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                 /*39*/ MatAXPY_MPIAIJ,
2727                                        MatCreateSubMatrices_MPIAIJ,
2728                                        MatIncreaseOverlap_MPIAIJ,
2729                                        MatGetValues_MPIAIJ,
2730                                        MatCopy_MPIAIJ,
2731                                 /*44*/ MatGetRowMax_MPIAIJ,
2732                                        MatScale_MPIAIJ,
2733                                        MatShift_MPIAIJ,
2734                                        MatDiagonalSet_MPIAIJ,
2735                                        MatZeroRowsColumns_MPIAIJ,
2736                                 /*49*/ MatSetRandom_MPIAIJ,
2737                                        MatGetRowIJ_MPIAIJ,
2738                                        MatRestoreRowIJ_MPIAIJ,
2739                                        NULL,
2740                                        NULL,
2741                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2742                                        NULL,
2743                                        MatSetUnfactored_MPIAIJ,
2744                                        MatPermute_MPIAIJ,
2745                                        NULL,
2746                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2747                                        MatDestroy_MPIAIJ,
2748                                        MatView_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                 /*64*/ NULL,
2752                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2757                                        MatGetRowMinAbs_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                 /*75*/ MatFDColoringApply_AIJ,
2763                                        MatSetFromOptions_MPIAIJ,
2764                                        NULL,
2765                                        NULL,
2766                                        MatFindZeroDiagonals_MPIAIJ,
2767                                 /*80*/ NULL,
2768                                        NULL,
2769                                        NULL,
2770                                 /*83*/ MatLoad_MPIAIJ,
2771                                        MatIsSymmetric_MPIAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                 /*89*/ NULL,
2777                                        NULL,
2778                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                        NULL,
2785                                        MatBindToCPU_MPIAIJ,
2786                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2787                                        NULL,
2788                                        NULL,
2789                                        MatConjugate_MPIAIJ,
2790                                        NULL,
2791                                 /*104*/MatSetValuesRow_MPIAIJ,
2792                                        MatRealPart_MPIAIJ,
2793                                        MatImaginaryPart_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                 /*109*/NULL,
2797                                        NULL,
2798                                        MatGetRowMin_MPIAIJ,
2799                                        NULL,
2800                                        MatMissingDiagonal_MPIAIJ,
2801                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2802                                        NULL,
2803                                        MatGetGhosts_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2807                                        NULL,
2808                                        NULL,
2809                                        NULL,
2810                                        MatGetMultiProcBlock_MPIAIJ,
2811                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2812                                        MatGetColumnReductions_MPIAIJ,
2813                                        MatInvertBlockDiagonal_MPIAIJ,
2814                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2815                                        MatCreateSubMatricesMPI_MPIAIJ,
2816                                 /*129*/NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2820                                        NULL,
2821                                 /*134*/NULL,
2822                                        NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        NULL,
2826                                 /*139*/MatSetBlockSizes_MPIAIJ,
2827                                        NULL,
2828                                        NULL,
2829                                        MatFDColoringSetUp_MPIXAIJ,
2830                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2831                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2832                                 /*145*/NULL,
2833                                        NULL,
2834                                        NULL,
2835                                        MatCreateGraph_Simple_AIJ,
2836                                        MatFilter_AIJ
2837 };
2838 
2839 /* ----------------------------------------------------------------------------------------*/
2840 
2841 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2842 {
2843   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2844 
2845   PetscFunctionBegin;
2846   PetscCall(MatStoreValues(aij->A));
2847   PetscCall(MatStoreValues(aij->B));
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2852 {
2853   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2854 
2855   PetscFunctionBegin;
2856   PetscCall(MatRetrieveValues(aij->A));
2857   PetscCall(MatRetrieveValues(aij->B));
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2862 {
2863   Mat_MPIAIJ     *b;
2864   PetscMPIInt    size;
2865 
2866   PetscFunctionBegin;
2867   PetscCall(PetscLayoutSetUp(B->rmap));
2868   PetscCall(PetscLayoutSetUp(B->cmap));
2869   b = (Mat_MPIAIJ*)B->data;
2870 
2871 #if defined(PETSC_USE_CTABLE)
2872   PetscCall(PetscTableDestroy(&b->colmap));
2873 #else
2874   PetscCall(PetscFree(b->colmap));
2875 #endif
2876   PetscCall(PetscFree(b->garray));
2877   PetscCall(VecDestroy(&b->lvec));
2878   PetscCall(VecScatterDestroy(&b->Mvctx));
2879 
2880   /* Because the B will have been resized we simply destroy it and create a new one each time */
2881   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2882   PetscCall(MatDestroy(&b->B));
2883   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2884   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2885   PetscCall(MatSetBlockSizesFromMats(b->B,B,B));
2886   PetscCall(MatSetType(b->B,MATSEQAIJ));
2887   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2888 
2889   if (!B->preallocated) {
2890     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2891     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2892     PetscCall(MatSetBlockSizesFromMats(b->A,B,B));
2893     PetscCall(MatSetType(b->A,MATSEQAIJ));
2894     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2895   }
2896 
2897   PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz));
2898   PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz));
2899   B->preallocated  = PETSC_TRUE;
2900   B->was_assembled = PETSC_FALSE;
2901   B->assembled     = PETSC_FALSE;
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2906 {
2907   Mat_MPIAIJ     *b;
2908 
2909   PetscFunctionBegin;
2910   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2911   PetscCall(PetscLayoutSetUp(B->rmap));
2912   PetscCall(PetscLayoutSetUp(B->cmap));
2913   b = (Mat_MPIAIJ*)B->data;
2914 
2915 #if defined(PETSC_USE_CTABLE)
2916   PetscCall(PetscTableDestroy(&b->colmap));
2917 #else
2918   PetscCall(PetscFree(b->colmap));
2919 #endif
2920   PetscCall(PetscFree(b->garray));
2921   PetscCall(VecDestroy(&b->lvec));
2922   PetscCall(VecScatterDestroy(&b->Mvctx));
2923 
2924   PetscCall(MatResetPreallocation(b->A));
2925   PetscCall(MatResetPreallocation(b->B));
2926   B->preallocated  = PETSC_TRUE;
2927   B->was_assembled = PETSC_FALSE;
2928   B->assembled = PETSC_FALSE;
2929   PetscFunctionReturn(0);
2930 }
2931 
2932 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2933 {
2934   Mat            mat;
2935   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2936 
2937   PetscFunctionBegin;
2938   *newmat = NULL;
2939   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2940   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2941   PetscCall(MatSetBlockSizesFromMats(mat,matin,matin));
2942   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2943   a       = (Mat_MPIAIJ*)mat->data;
2944 
2945   mat->factortype   = matin->factortype;
2946   mat->assembled    = matin->assembled;
2947   mat->insertmode   = NOT_SET_VALUES;
2948   mat->preallocated = matin->preallocated;
2949 
2950   a->size         = oldmat->size;
2951   a->rank         = oldmat->rank;
2952   a->donotstash   = oldmat->donotstash;
2953   a->roworiented  = oldmat->roworiented;
2954   a->rowindices   = NULL;
2955   a->rowvalues    = NULL;
2956   a->getrowactive = PETSC_FALSE;
2957 
2958   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2959   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2960 
2961   if (oldmat->colmap) {
2962 #if defined(PETSC_USE_CTABLE)
2963     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2964 #else
2965     PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap));
2966     PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt)));
2967     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N));
2968 #endif
2969   } else a->colmap = NULL;
2970   if (oldmat->garray) {
2971     PetscInt len;
2972     len  = oldmat->B->cmap->n;
2973     PetscCall(PetscMalloc1(len+1,&a->garray));
2974     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2975     if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2976   } else a->garray = NULL;
2977 
2978   /* It may happen MatDuplicate is called with a non-assembled matrix
2979      In fact, MatDuplicate only requires the matrix to be preallocated
2980      This may happen inside a DMCreateMatrix_Shell */
2981   if (oldmat->lvec) {
2982     PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2983     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2984   }
2985   if (oldmat->Mvctx) {
2986     PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2987     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2988   }
2989   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2990   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2991   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2992   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2993   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2994   *newmat = mat;
2995   PetscFunctionReturn(0);
2996 }
2997 
2998 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2999 {
3000   PetscBool      isbinary, ishdf5;
3001 
3002   PetscFunctionBegin;
3003   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3004   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3005   /* force binary viewer to load .info file if it has not yet done so */
3006   PetscCall(PetscViewerSetUp(viewer));
3007   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3008   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
3009   if (isbinary) {
3010     PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer));
3011   } else if (ishdf5) {
3012 #if defined(PETSC_HAVE_HDF5)
3013     PetscCall(MatLoad_AIJ_HDF5(newMat,viewer));
3014 #else
3015     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3016 #endif
3017   } else {
3018     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3019   }
3020   PetscFunctionReturn(0);
3021 }
3022 
3023 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3024 {
3025   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3026   PetscInt       *rowidxs,*colidxs;
3027   PetscScalar    *matvals;
3028 
3029   PetscFunctionBegin;
3030   PetscCall(PetscViewerSetUp(viewer));
3031 
3032   /* read in matrix header */
3033   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3034   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3035   M  = header[1]; N = header[2]; nz = header[3];
3036   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3037   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3038   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3039 
3040   /* set block sizes from the viewer's .info file */
3041   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3042   /* set global sizes if not set already */
3043   if (mat->rmap->N < 0) mat->rmap->N = M;
3044   if (mat->cmap->N < 0) mat->cmap->N = N;
3045   PetscCall(PetscLayoutSetUp(mat->rmap));
3046   PetscCall(PetscLayoutSetUp(mat->cmap));
3047 
3048   /* check if the matrix sizes are correct */
3049   PetscCall(MatGetSize(mat,&rows,&cols));
3050   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3051 
3052   /* read in row lengths and build row indices */
3053   PetscCall(MatGetLocalSize(mat,&m,NULL));
3054   PetscCall(PetscMalloc1(m+1,&rowidxs));
3055   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3056   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3057   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3058   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3059   /* read in column indices and matrix values */
3060   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3061   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3062   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3063   /* store matrix indices and values */
3064   PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals));
3065   PetscCall(PetscFree(rowidxs));
3066   PetscCall(PetscFree2(colidxs,matvals));
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 /* Not scalable because of ISAllGather() unless getting all columns. */
3071 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3072 {
3073   IS             iscol_local;
3074   PetscBool      isstride;
3075   PetscMPIInt    lisstride=0,gisstride;
3076 
3077   PetscFunctionBegin;
3078   /* check if we are grabbing all columns*/
3079   PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride));
3080 
3081   if (isstride) {
3082     PetscInt  start,len,mstart,mlen;
3083     PetscCall(ISStrideGetInfo(iscol,&start,NULL));
3084     PetscCall(ISGetLocalSize(iscol,&len));
3085     PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen));
3086     if (mstart == start && mlen-mstart == len) lisstride = 1;
3087   }
3088 
3089   PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat)));
3090   if (gisstride) {
3091     PetscInt N;
3092     PetscCall(MatGetSize(mat,NULL,&N));
3093     PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local));
3094     PetscCall(ISSetIdentity(iscol_local));
3095     PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3096   } else {
3097     PetscInt cbs;
3098     PetscCall(ISGetBlockSize(iscol,&cbs));
3099     PetscCall(ISAllGather(iscol,&iscol_local));
3100     PetscCall(ISSetBlockSize(iscol_local,cbs));
3101   }
3102 
3103   *isseq = iscol_local;
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 /*
3108  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3109  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3110 
3111  Input Parameters:
3112    mat - matrix
3113    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3114            i.e., mat->rstart <= isrow[i] < mat->rend
3115    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3116            i.e., mat->cstart <= iscol[i] < mat->cend
3117  Output Parameter:
3118    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3119    iscol_o - sequential column index set for retrieving mat->B
3120    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3121  */
3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3123 {
3124   Vec            x,cmap;
3125   const PetscInt *is_idx;
3126   PetscScalar    *xarray,*cmaparray;
3127   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3128   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3129   Mat            B=a->B;
3130   Vec            lvec=a->lvec,lcmap;
3131   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3132   MPI_Comm       comm;
3133   VecScatter     Mvctx=a->Mvctx;
3134 
3135   PetscFunctionBegin;
3136   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3137   PetscCall(ISGetLocalSize(iscol,&ncols));
3138 
3139   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3140   PetscCall(MatCreateVecs(mat,&x,NULL));
3141   PetscCall(VecSet(x,-1.0));
3142   PetscCall(VecDuplicate(x,&cmap));
3143   PetscCall(VecSet(cmap,-1.0));
3144 
3145   /* Get start indices */
3146   PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm));
3147   isstart -= ncols;
3148   PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend));
3149 
3150   PetscCall(ISGetIndices(iscol,&is_idx));
3151   PetscCall(VecGetArray(x,&xarray));
3152   PetscCall(VecGetArray(cmap,&cmaparray));
3153   PetscCall(PetscMalloc1(ncols,&idx));
3154   for (i=0; i<ncols; i++) {
3155     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3156     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3157     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3158   }
3159   PetscCall(VecRestoreArray(x,&xarray));
3160   PetscCall(VecRestoreArray(cmap,&cmaparray));
3161   PetscCall(ISRestoreIndices(iscol,&is_idx));
3162 
3163   /* Get iscol_d */
3164   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d));
3165   PetscCall(ISGetBlockSize(iscol,&i));
3166   PetscCall(ISSetBlockSize(*iscol_d,i));
3167 
3168   /* Get isrow_d */
3169   PetscCall(ISGetLocalSize(isrow,&m));
3170   rstart = mat->rmap->rstart;
3171   PetscCall(PetscMalloc1(m,&idx));
3172   PetscCall(ISGetIndices(isrow,&is_idx));
3173   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3174   PetscCall(ISRestoreIndices(isrow,&is_idx));
3175 
3176   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d));
3177   PetscCall(ISGetBlockSize(isrow,&i));
3178   PetscCall(ISSetBlockSize(*isrow_d,i));
3179 
3180   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3181   PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3182   PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD));
3183 
3184   PetscCall(VecDuplicate(lvec,&lcmap));
3185 
3186   PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3187   PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD));
3188 
3189   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3190   /* off-process column indices */
3191   count = 0;
3192   PetscCall(PetscMalloc1(Bn,&idx));
3193   PetscCall(PetscMalloc1(Bn,&cmap1));
3194 
3195   PetscCall(VecGetArray(lvec,&xarray));
3196   PetscCall(VecGetArray(lcmap,&cmaparray));
3197   for (i=0; i<Bn; i++) {
3198     if (PetscRealPart(xarray[i]) > -1.0) {
3199       idx[count]     = i;                   /* local column index in off-diagonal part B */
3200       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3201       count++;
3202     }
3203   }
3204   PetscCall(VecRestoreArray(lvec,&xarray));
3205   PetscCall(VecRestoreArray(lcmap,&cmaparray));
3206 
3207   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o));
3208   /* cannot ensure iscol_o has same blocksize as iscol! */
3209 
3210   PetscCall(PetscFree(idx));
3211   *garray = cmap1;
3212 
3213   PetscCall(VecDestroy(&x));
3214   PetscCall(VecDestroy(&cmap));
3215   PetscCall(VecDestroy(&lcmap));
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3220 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3221 {
3222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3223   Mat            M = NULL;
3224   MPI_Comm       comm;
3225   IS             iscol_d,isrow_d,iscol_o;
3226   Mat            Asub = NULL,Bsub = NULL;
3227   PetscInt       n;
3228 
3229   PetscFunctionBegin;
3230   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3231 
3232   if (call == MAT_REUSE_MATRIX) {
3233     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3234     PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d));
3235     PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3236 
3237     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d));
3238     PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3239 
3240     PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o));
3241     PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3242 
3243     /* Update diagonal and off-diagonal portions of submat */
3244     asub = (Mat_MPIAIJ*)(*submat)->data;
3245     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A));
3246     PetscCall(ISGetLocalSize(iscol_o,&n));
3247     if (n) {
3248       PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B));
3249     }
3250     PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY));
3251     PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY));
3252 
3253   } else { /* call == MAT_INITIAL_MATRIX) */
3254     const PetscInt *garray;
3255     PetscInt        BsubN;
3256 
3257     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3258     PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray));
3259 
3260     /* Create local submatrices Asub and Bsub */
3261     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub));
3262     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub));
3263 
3264     /* Create submatrix M */
3265     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M));
3266 
3267     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3268     asub = (Mat_MPIAIJ*)M->data;
3269 
3270     PetscCall(ISGetLocalSize(iscol_o,&BsubN));
3271     n = asub->B->cmap->N;
3272     if (BsubN > n) {
3273       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3274       const PetscInt *idx;
3275       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3276       PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN));
3277 
3278       PetscCall(PetscMalloc1(n,&idx_new));
3279       j = 0;
3280       PetscCall(ISGetIndices(iscol_o,&idx));
3281       for (i=0; i<n; i++) {
3282         if (j >= BsubN) break;
3283         while (subgarray[i] > garray[j]) j++;
3284 
3285         if (subgarray[i] == garray[j]) {
3286           idx_new[i] = idx[j++];
3287         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3288       }
3289       PetscCall(ISRestoreIndices(iscol_o,&idx));
3290 
3291       PetscCall(ISDestroy(&iscol_o));
3292       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o));
3293 
3294     } else if (BsubN < n) {
3295       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3296     }
3297 
3298     PetscCall(PetscFree(garray));
3299     *submat = M;
3300 
3301     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3302     PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d));
3303     PetscCall(ISDestroy(&isrow_d));
3304 
3305     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d));
3306     PetscCall(ISDestroy(&iscol_d));
3307 
3308     PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o));
3309     PetscCall(ISDestroy(&iscol_o));
3310   }
3311   PetscFunctionReturn(0);
3312 }
3313 
3314 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3315 {
3316   IS             iscol_local=NULL,isrow_d;
3317   PetscInt       csize;
3318   PetscInt       n,i,j,start,end;
3319   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3320   MPI_Comm       comm;
3321 
3322   PetscFunctionBegin;
3323   /* If isrow has same processor distribution as mat,
3324      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3325   if (call == MAT_REUSE_MATRIX) {
3326     PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d));
3327     if (isrow_d) {
3328       sameRowDist  = PETSC_TRUE;
3329       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3330     } else {
3331       PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local));
3332       if (iscol_local) {
3333         sameRowDist  = PETSC_TRUE;
3334         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3335       }
3336     }
3337   } else {
3338     /* Check if isrow has same processor distribution as mat */
3339     sameDist[0] = PETSC_FALSE;
3340     PetscCall(ISGetLocalSize(isrow,&n));
3341     if (!n) {
3342       sameDist[0] = PETSC_TRUE;
3343     } else {
3344       PetscCall(ISGetMinMax(isrow,&i,&j));
3345       PetscCall(MatGetOwnershipRange(mat,&start,&end));
3346       if (i >= start && j < end) {
3347         sameDist[0] = PETSC_TRUE;
3348       }
3349     }
3350 
3351     /* Check if iscol has same processor distribution as mat */
3352     sameDist[1] = PETSC_FALSE;
3353     PetscCall(ISGetLocalSize(iscol,&n));
3354     if (!n) {
3355       sameDist[1] = PETSC_TRUE;
3356     } else {
3357       PetscCall(ISGetMinMax(iscol,&i,&j));
3358       PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end));
3359       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3360     }
3361 
3362     PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3363     PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm));
3364     sameRowDist = tsameDist[0];
3365   }
3366 
3367   if (sameRowDist) {
3368     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3369       /* isrow and iscol have same processor distribution as mat */
3370       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat));
3371       PetscFunctionReturn(0);
3372     } else { /* sameRowDist */
3373       /* isrow has same processor distribution as mat */
3374       if (call == MAT_INITIAL_MATRIX) {
3375         PetscBool sorted;
3376         PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3377         PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */
3378         PetscCall(ISGetSize(iscol,&i));
3379         PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3380 
3381         PetscCall(ISSorted(iscol_local,&sorted));
3382         if (sorted) {
3383           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3384           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat));
3385           PetscFunctionReturn(0);
3386         }
3387       } else { /* call == MAT_REUSE_MATRIX */
3388         IS iscol_sub;
3389         PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3390         if (iscol_sub) {
3391           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat));
3392           PetscFunctionReturn(0);
3393         }
3394       }
3395     }
3396   }
3397 
3398   /* General case: iscol -> iscol_local which has global size of iscol */
3399   if (call == MAT_REUSE_MATRIX) {
3400     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
3401     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3402   } else {
3403     if (!iscol_local) {
3404       PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local));
3405     }
3406   }
3407 
3408   PetscCall(ISGetLocalSize(iscol,&csize));
3409   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat));
3410 
3411   if (call == MAT_INITIAL_MATRIX) {
3412     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
3413     PetscCall(ISDestroy(&iscol_local));
3414   }
3415   PetscFunctionReturn(0);
3416 }
3417 
3418 /*@C
3419      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3420          and "off-diagonal" part of the matrix in CSR format.
3421 
3422    Collective
3423 
3424    Input Parameters:
3425 +  comm - MPI communicator
3426 .  A - "diagonal" portion of matrix
3427 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3428 -  garray - global index of B columns
3429 
3430    Output Parameter:
3431 .   mat - the matrix, with input A as its local diagonal matrix
3432    Level: advanced
3433 
3434    Notes:
3435        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3436        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3437 
3438 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3439 @*/
3440 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3441 {
3442   Mat_MPIAIJ        *maij;
3443   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3444   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3445   const PetscScalar *oa;
3446   Mat               Bnew;
3447   PetscInt          m,n,N;
3448   MatType           mpi_mat_type;
3449 
3450   PetscFunctionBegin;
3451   PetscCall(MatCreate(comm,mat));
3452   PetscCall(MatGetSize(A,&m,&n));
3453   PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3454   PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3455   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3456   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3457 
3458   /* Get global columns of mat */
3459   PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm));
3460 
3461   PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N));
3462   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3463   PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type));
3464   PetscCall(MatSetType(*mat,mpi_mat_type));
3465 
3466   PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs));
3467   maij = (Mat_MPIAIJ*)(*mat)->data;
3468 
3469   (*mat)->preallocated = PETSC_TRUE;
3470 
3471   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3472   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3473 
3474   /* Set A as diagonal portion of *mat */
3475   maij->A = A;
3476 
3477   nz = oi[m];
3478   for (i=0; i<nz; i++) {
3479     col   = oj[i];
3480     oj[i] = garray[col];
3481   }
3482 
3483   /* Set Bnew as off-diagonal portion of *mat */
3484   PetscCall(MatSeqAIJGetArrayRead(B,&oa));
3485   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew));
3486   PetscCall(MatSeqAIJRestoreArrayRead(B,&oa));
3487   bnew        = (Mat_SeqAIJ*)Bnew->data;
3488   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3489   maij->B     = Bnew;
3490 
3491   PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3492 
3493   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3494   b->free_a       = PETSC_FALSE;
3495   b->free_ij      = PETSC_FALSE;
3496   PetscCall(MatDestroy(&B));
3497 
3498   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3499   bnew->free_a       = PETSC_TRUE;
3500   bnew->free_ij      = PETSC_TRUE;
3501 
3502   /* condense columns of maij->B */
3503   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3504   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3505   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3506   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
3507   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3512 
3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3514 {
3515   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3516   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3517   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3518   Mat            M,Msub,B=a->B;
3519   MatScalar      *aa;
3520   Mat_SeqAIJ     *aij;
3521   PetscInt       *garray = a->garray,*colsub,Ncols;
3522   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3523   IS             iscol_sub,iscmap;
3524   const PetscInt *is_idx,*cmap;
3525   PetscBool      allcolumns=PETSC_FALSE;
3526   MPI_Comm       comm;
3527 
3528   PetscFunctionBegin;
3529   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3530   if (call == MAT_REUSE_MATRIX) {
3531     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub));
3532     PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3533     PetscCall(ISGetLocalSize(iscol_sub,&count));
3534 
3535     PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap));
3536     PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3537 
3538     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub));
3539     PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3540 
3541     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub));
3542 
3543   } else { /* call == MAT_INITIAL_MATRIX) */
3544     PetscBool flg;
3545 
3546     PetscCall(ISGetLocalSize(iscol,&n));
3547     PetscCall(ISGetSize(iscol,&Ncols));
3548 
3549     /* (1) iscol -> nonscalable iscol_local */
3550     /* Check for special case: each processor gets entire matrix columns */
3551     PetscCall(ISIdentity(iscol_local,&flg));
3552     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3553     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3554     if (allcolumns) {
3555       iscol_sub = iscol_local;
3556       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3557       PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap));
3558 
3559     } else {
3560       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3561       PetscInt *idx,*cmap1,k;
3562       PetscCall(PetscMalloc1(Ncols,&idx));
3563       PetscCall(PetscMalloc1(Ncols,&cmap1));
3564       PetscCall(ISGetIndices(iscol_local,&is_idx));
3565       count = 0;
3566       k     = 0;
3567       for (i=0; i<Ncols; i++) {
3568         j = is_idx[i];
3569         if (j >= cstart && j < cend) {
3570           /* diagonal part of mat */
3571           idx[count]     = j;
3572           cmap1[count++] = i; /* column index in submat */
3573         } else if (Bn) {
3574           /* off-diagonal part of mat */
3575           if (j == garray[k]) {
3576             idx[count]     = j;
3577             cmap1[count++] = i;  /* column index in submat */
3578           } else if (j > garray[k]) {
3579             while (j > garray[k] && k < Bn-1) k++;
3580             if (j == garray[k]) {
3581               idx[count]     = j;
3582               cmap1[count++] = i; /* column index in submat */
3583             }
3584           }
3585         }
3586       }
3587       PetscCall(ISRestoreIndices(iscol_local,&is_idx));
3588 
3589       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub));
3590       PetscCall(ISGetBlockSize(iscol,&cbs));
3591       PetscCall(ISSetBlockSize(iscol_sub,cbs));
3592 
3593       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap));
3594     }
3595 
3596     /* (3) Create sequential Msub */
3597     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub));
3598   }
3599 
3600   PetscCall(ISGetLocalSize(iscol_sub,&count));
3601   aij  = (Mat_SeqAIJ*)(Msub)->data;
3602   ii   = aij->i;
3603   PetscCall(ISGetIndices(iscmap,&cmap));
3604 
3605   /*
3606       m - number of local rows
3607       Ncols - number of columns (same on all processors)
3608       rstart - first row in new global matrix generated
3609   */
3610   PetscCall(MatGetSize(Msub,&m,NULL));
3611 
3612   if (call == MAT_INITIAL_MATRIX) {
3613     /* (4) Create parallel newmat */
3614     PetscMPIInt    rank,size;
3615     PetscInt       csize;
3616 
3617     PetscCallMPI(MPI_Comm_size(comm,&size));
3618     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3619 
3620     /*
3621         Determine the number of non-zeros in the diagonal and off-diagonal
3622         portions of the matrix in order to do correct preallocation
3623     */
3624 
3625     /* first get start and end of "diagonal" columns */
3626     PetscCall(ISGetLocalSize(iscol,&csize));
3627     if (csize == PETSC_DECIDE) {
3628       PetscCall(ISGetSize(isrow,&mglobal));
3629       if (mglobal == Ncols) { /* square matrix */
3630         nlocal = m;
3631       } else {
3632         nlocal = Ncols/size + ((Ncols % size) > rank);
3633       }
3634     } else {
3635       nlocal = csize;
3636     }
3637     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3638     rstart = rend - nlocal;
3639     PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3640 
3641     /* next, compute all the lengths */
3642     jj    = aij->j;
3643     PetscCall(PetscMalloc1(2*m+1,&dlens));
3644     olens = dlens + m;
3645     for (i=0; i<m; i++) {
3646       jend = ii[i+1] - ii[i];
3647       olen = 0;
3648       dlen = 0;
3649       for (j=0; j<jend; j++) {
3650         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3651         else dlen++;
3652         jj++;
3653       }
3654       olens[i] = olen;
3655       dlens[i] = dlen;
3656     }
3657 
3658     PetscCall(ISGetBlockSize(isrow,&bs));
3659     PetscCall(ISGetBlockSize(iscol,&cbs));
3660 
3661     PetscCall(MatCreate(comm,&M));
3662     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols));
3663     PetscCall(MatSetBlockSizes(M,bs,cbs));
3664     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3665     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3666     PetscCall(PetscFree(dlens));
3667 
3668   } else { /* call == MAT_REUSE_MATRIX */
3669     M    = *newmat;
3670     PetscCall(MatGetLocalSize(M,&i,NULL));
3671     PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3672     PetscCall(MatZeroEntries(M));
3673     /*
3674          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3675        rather than the slower MatSetValues().
3676     */
3677     M->was_assembled = PETSC_TRUE;
3678     M->assembled     = PETSC_FALSE;
3679   }
3680 
3681   /* (5) Set values of Msub to *newmat */
3682   PetscCall(PetscMalloc1(count,&colsub));
3683   PetscCall(MatGetOwnershipRange(M,&rstart,NULL));
3684 
3685   jj   = aij->j;
3686   PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa));
3687   for (i=0; i<m; i++) {
3688     row = rstart + i;
3689     nz  = ii[i+1] - ii[i];
3690     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3691     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES));
3692     jj += nz; aa += nz;
3693   }
3694   PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa));
3695   PetscCall(ISRestoreIndices(iscmap,&cmap));
3696 
3697   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3698   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3699 
3700   PetscCall(PetscFree(colsub));
3701 
3702   /* save Msub, iscol_sub and iscmap used in processor for next request */
3703   if (call == MAT_INITIAL_MATRIX) {
3704     *newmat = M;
3705     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub));
3706     PetscCall(MatDestroy(&Msub));
3707 
3708     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub));
3709     PetscCall(ISDestroy(&iscol_sub));
3710 
3711     PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap));
3712     PetscCall(ISDestroy(&iscmap));
3713 
3714     if (iscol_local) {
3715       PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local));
3716       PetscCall(ISDestroy(&iscol_local));
3717     }
3718   }
3719   PetscFunctionReturn(0);
3720 }
3721 
3722 /*
3723     Not great since it makes two copies of the submatrix, first an SeqAIJ
3724   in local and then by concatenating the local matrices the end result.
3725   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3726 
3727   Note: This requires a sequential iscol with all indices.
3728 */
3729 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3730 {
3731   PetscMPIInt    rank,size;
3732   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3733   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3734   Mat            M,Mreuse;
3735   MatScalar      *aa,*vwork;
3736   MPI_Comm       comm;
3737   Mat_SeqAIJ     *aij;
3738   PetscBool      colflag,allcolumns=PETSC_FALSE;
3739 
3740   PetscFunctionBegin;
3741   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
3742   PetscCallMPI(MPI_Comm_rank(comm,&rank));
3743   PetscCallMPI(MPI_Comm_size(comm,&size));
3744 
3745   /* Check for special case: each processor gets entire matrix columns */
3746   PetscCall(ISIdentity(iscol,&colflag));
3747   PetscCall(ISGetLocalSize(iscol,&n));
3748   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3749   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
3750 
3751   if (call ==  MAT_REUSE_MATRIX) {
3752     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
3753     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3754     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse));
3755   } else {
3756     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse));
3757   }
3758 
3759   /*
3760       m - number of local rows
3761       n - number of columns (same on all processors)
3762       rstart - first row in new global matrix generated
3763   */
3764   PetscCall(MatGetSize(Mreuse,&m,&n));
3765   PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs));
3766   if (call == MAT_INITIAL_MATRIX) {
3767     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3768     ii  = aij->i;
3769     jj  = aij->j;
3770 
3771     /*
3772         Determine the number of non-zeros in the diagonal and off-diagonal
3773         portions of the matrix in order to do correct preallocation
3774     */
3775 
3776     /* first get start and end of "diagonal" columns */
3777     if (csize == PETSC_DECIDE) {
3778       PetscCall(ISGetSize(isrow,&mglobal));
3779       if (mglobal == n) { /* square matrix */
3780         nlocal = m;
3781       } else {
3782         nlocal = n/size + ((n % size) > rank);
3783       }
3784     } else {
3785       nlocal = csize;
3786     }
3787     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
3788     rstart = rend - nlocal;
3789     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3790 
3791     /* next, compute all the lengths */
3792     PetscCall(PetscMalloc1(2*m+1,&dlens));
3793     olens = dlens + m;
3794     for (i=0; i<m; i++) {
3795       jend = ii[i+1] - ii[i];
3796       olen = 0;
3797       dlen = 0;
3798       for (j=0; j<jend; j++) {
3799         if (*jj < rstart || *jj >= rend) olen++;
3800         else dlen++;
3801         jj++;
3802       }
3803       olens[i] = olen;
3804       dlens[i] = dlen;
3805     }
3806     PetscCall(MatCreate(comm,&M));
3807     PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n));
3808     PetscCall(MatSetBlockSizes(M,bs,cbs));
3809     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
3810     PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens));
3811     PetscCall(PetscFree(dlens));
3812   } else {
3813     PetscInt ml,nl;
3814 
3815     M    = *newmat;
3816     PetscCall(MatGetLocalSize(M,&ml,&nl));
3817     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3818     PetscCall(MatZeroEntries(M));
3819     /*
3820          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3821        rather than the slower MatSetValues().
3822     */
3823     M->was_assembled = PETSC_TRUE;
3824     M->assembled     = PETSC_FALSE;
3825   }
3826   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
3827   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3828   ii   = aij->i;
3829   jj   = aij->j;
3830 
3831   /* trigger copy to CPU if needed */
3832   PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa));
3833   for (i=0; i<m; i++) {
3834     row   = rstart + i;
3835     nz    = ii[i+1] - ii[i];
3836     cwork = jj; jj += nz;
3837     vwork = aa; aa += nz;
3838     PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
3839   }
3840   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa));
3841 
3842   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
3843   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
3844   *newmat = M;
3845 
3846   /* save submatrix used in processor for next request */
3847   if (call ==  MAT_INITIAL_MATRIX) {
3848     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
3849     PetscCall(MatDestroy(&Mreuse));
3850   }
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3855 {
3856   PetscInt       m,cstart, cend,j,nnz,i,d,*ld;
3857   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3858   const PetscInt *JJ;
3859   PetscBool      nooffprocentries;
3860   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)B->data;
3861 
3862   PetscFunctionBegin;
3863   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3864 
3865   PetscCall(PetscLayoutSetUp(B->rmap));
3866   PetscCall(PetscLayoutSetUp(B->cmap));
3867   m      = B->rmap->n;
3868   cstart = B->cmap->rstart;
3869   cend   = B->cmap->rend;
3870   rstart = B->rmap->rstart;
3871 
3872   PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz));
3873 
3874   if (PetscDefined(USE_DEBUG)) {
3875     for (i=0; i<m; i++) {
3876       nnz = Ii[i+1]- Ii[i];
3877       JJ  = J + Ii[i];
3878       PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3879       PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3880       PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3881     }
3882   }
3883 
3884   for (i=0; i<m; i++) {
3885     nnz     = Ii[i+1]- Ii[i];
3886     JJ      = J + Ii[i];
3887     nnz_max = PetscMax(nnz_max,nnz);
3888     d       = 0;
3889     for (j=0; j<nnz; j++) {
3890       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3891     }
3892     d_nnz[i] = d;
3893     o_nnz[i] = nnz - d;
3894   }
3895   PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
3896   PetscCall(PetscFree2(d_nnz,o_nnz));
3897 
3898   for (i=0; i<m; i++) {
3899     ii   = i + rstart;
3900     PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES));
3901   }
3902   nooffprocentries    = B->nooffprocentries;
3903   B->nooffprocentries = PETSC_TRUE;
3904   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3905   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3906   B->nooffprocentries = nooffprocentries;
3907 
3908   /* count number of entries below block diagonal */
3909   PetscCall(PetscFree(Aij->ld));
3910   PetscCall(PetscCalloc1(m,&ld));
3911   Aij->ld = ld;
3912   for (i=0; i<m; i++) {
3913     nnz  = Ii[i+1] - Ii[i];
3914     j     = 0;
3915     while  (j < nnz && J[j] < cstart) {j++;}
3916     ld[i] = j;
3917     J     += nnz;
3918   }
3919 
3920   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3921   PetscFunctionReturn(0);
3922 }
3923 
3924 /*@
3925    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3926    (the default parallel PETSc format).
3927 
3928    Collective
3929 
3930    Input Parameters:
3931 +  B - the matrix
3932 .  i - the indices into j for the start of each local row (starts with zero)
3933 .  j - the column indices for each local row (starts with zero)
3934 -  v - optional values in the matrix
3935 
3936    Level: developer
3937 
3938    Notes:
3939        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3940      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3941      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3942 
3943        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3944 
3945        The format which is used for the sparse matrix input, is equivalent to a
3946     row-major ordering.. i.e for the following matrix, the input data expected is
3947     as shown
3948 
3949 $        1 0 0
3950 $        2 0 3     P0
3951 $       -------
3952 $        4 5 6     P1
3953 $
3954 $     Process0 [P0]: rows_owned=[0,1]
3955 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3956 $        j =  {0,0,2}  [size = 3]
3957 $        v =  {1,2,3}  [size = 3]
3958 $
3959 $     Process1 [P1]: rows_owned=[2]
3960 $        i =  {0,3}    [size = nrow+1  = 1+1]
3961 $        j =  {0,1,2}  [size = 3]
3962 $        v =  {4,5,6}  [size = 3]
3963 
3964 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3965           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3966 @*/
3967 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3968 {
3969   PetscFunctionBegin;
3970   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3971   PetscFunctionReturn(0);
3972 }
3973 
3974 /*@C
3975    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3976    (the default parallel PETSc format).  For good matrix assembly performance
3977    the user should preallocate the matrix storage by setting the parameters
3978    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3979    performance can be increased by more than a factor of 50.
3980 
3981    Collective
3982 
3983    Input Parameters:
3984 +  B - the matrix
3985 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3986            (same value is used for all local rows)
3987 .  d_nnz - array containing the number of nonzeros in the various rows of the
3988            DIAGONAL portion of the local submatrix (possibly different for each row)
3989            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3990            The size of this array is equal to the number of local rows, i.e 'm'.
3991            For matrices that will be factored, you must leave room for (and set)
3992            the diagonal entry even if it is zero.
3993 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3994            submatrix (same value is used for all local rows).
3995 -  o_nnz - array containing the number of nonzeros in the various rows of the
3996            OFF-DIAGONAL portion of the local submatrix (possibly different for
3997            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3998            structure. The size of this array is equal to the number
3999            of local rows, i.e 'm'.
4000 
4001    If the *_nnz parameter is given then the *_nz parameter is ignored
4002 
4003    The AIJ format (also called the Yale sparse matrix format or
4004    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4005    storage.  The stored row and column indices begin with zero.
4006    See Users-Manual: ch_mat for details.
4007 
4008    The parallel matrix is partitioned such that the first m0 rows belong to
4009    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4010    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4011 
4012    The DIAGONAL portion of the local submatrix of a processor can be defined
4013    as the submatrix which is obtained by extraction the part corresponding to
4014    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4015    first row that belongs to the processor, r2 is the last row belonging to
4016    the this processor, and c1-c2 is range of indices of the local part of a
4017    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4018    common case of a square matrix, the row and column ranges are the same and
4019    the DIAGONAL part is also square. The remaining portion of the local
4020    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4021 
4022    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4023 
4024    You can call MatGetInfo() to get information on how effective the preallocation was;
4025    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4026    You can also run with the option -info and look for messages with the string
4027    malloc in them to see if additional memory allocation was needed.
4028 
4029    Example usage:
4030 
4031    Consider the following 8x8 matrix with 34 non-zero values, that is
4032    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4033    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4034    as follows:
4035 
4036 .vb
4037             1  2  0  |  0  3  0  |  0  4
4038     Proc0   0  5  6  |  7  0  0  |  8  0
4039             9  0 10  | 11  0  0  | 12  0
4040     -------------------------------------
4041            13  0 14  | 15 16 17  |  0  0
4042     Proc1   0 18  0  | 19 20 21  |  0  0
4043             0  0  0  | 22 23  0  | 24  0
4044     -------------------------------------
4045     Proc2  25 26 27  |  0  0 28  | 29  0
4046            30  0  0  | 31 32 33  |  0 34
4047 .ve
4048 
4049    This can be represented as a collection of submatrices as:
4050 
4051 .vb
4052       A B C
4053       D E F
4054       G H I
4055 .ve
4056 
4057    Where the submatrices A,B,C are owned by proc0, D,E,F are
4058    owned by proc1, G,H,I are owned by proc2.
4059 
4060    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4061    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4062    The 'M','N' parameters are 8,8, and have the same values on all procs.
4063 
4064    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4065    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4066    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4067    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4068    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4069    matrix, ans [DF] as another SeqAIJ matrix.
4070 
4071    When d_nz, o_nz parameters are specified, d_nz storage elements are
4072    allocated for every row of the local diagonal submatrix, and o_nz
4073    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4074    One way to choose d_nz and o_nz is to use the max nonzerors per local
4075    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4076    In this case, the values of d_nz,o_nz are:
4077 .vb
4078      proc0 : dnz = 2, o_nz = 2
4079      proc1 : dnz = 3, o_nz = 2
4080      proc2 : dnz = 1, o_nz = 4
4081 .ve
4082    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4083    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4084    for proc3. i.e we are using 12+15+10=37 storage locations to store
4085    34 values.
4086 
4087    When d_nnz, o_nnz parameters are specified, the storage is specified
4088    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4089    In the above case the values for d_nnz,o_nnz are:
4090 .vb
4091      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4092      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4093      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4094 .ve
4095    Here the space allocated is sum of all the above values i.e 34, and
4096    hence pre-allocation is perfect.
4097 
4098    Level: intermediate
4099 
4100 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4101           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4102 @*/
4103 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4104 {
4105   PetscFunctionBegin;
4106   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4107   PetscValidType(B,1);
4108   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4109   PetscFunctionReturn(0);
4110 }
4111 
4112 /*@
4113      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4114          CSR format for the local rows.
4115 
4116    Collective
4117 
4118    Input Parameters:
4119 +  comm - MPI communicator
4120 .  m - number of local rows (Cannot be PETSC_DECIDE)
4121 .  n - This value should be the same as the local size used in creating the
4122        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4123        calculated if N is given) For square matrices n is almost always m.
4124 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4125 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4126 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4127 .   j - column indices
4128 -   a - optional matrix values
4129 
4130    Output Parameter:
4131 .   mat - the matrix
4132 
4133    Level: intermediate
4134 
4135    Notes:
4136        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4137      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4138      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4139 
4140        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4141 
4142        The format which is used for the sparse matrix input, is equivalent to a
4143     row-major ordering.. i.e for the following matrix, the input data expected is
4144     as shown
4145 
4146        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4147 
4148 $        1 0 0
4149 $        2 0 3     P0
4150 $       -------
4151 $        4 5 6     P1
4152 $
4153 $     Process0 [P0]: rows_owned=[0,1]
4154 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4155 $        j =  {0,0,2}  [size = 3]
4156 $        v =  {1,2,3}  [size = 3]
4157 $
4158 $     Process1 [P1]: rows_owned=[2]
4159 $        i =  {0,3}    [size = nrow+1  = 1+1]
4160 $        j =  {0,1,2}  [size = 3]
4161 $        v =  {4,5,6}  [size = 3]
4162 
4163 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4164           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4165 @*/
4166 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4167 {
4168   PetscFunctionBegin;
4169   PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4170   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4171   PetscCall(MatCreate(comm,mat));
4172   PetscCall(MatSetSizes(*mat,m,n,M,N));
4173   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4174   PetscCall(MatSetType(*mat,MATMPIAIJ));
4175   PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a));
4176   PetscFunctionReturn(0);
4177 }
4178 
4179 /*@
4180      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4181          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays()
4182 
4183      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4184 
4185    Collective
4186 
4187    Input Parameters:
4188 +  mat - the matrix
4189 .  m - number of local rows (Cannot be PETSC_DECIDE)
4190 .  n - This value should be the same as the local size used in creating the
4191        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4192        calculated if N is given) For square matrices n is almost always m.
4193 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4194 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4195 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4196 .  J - column indices
4197 -  v - matrix values
4198 
4199    Level: intermediate
4200 
4201 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4202           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4203 @*/
4204 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4205 {
4206   PetscInt       nnz,i;
4207   PetscBool      nooffprocentries;
4208   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4209   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4210   PetscScalar    *ad,*ao;
4211   PetscInt       ldi,Iii,md;
4212   const PetscInt *Adi = Ad->i;
4213   PetscInt       *ld = Aij->ld;
4214 
4215   PetscFunctionBegin;
4216   PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4217   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4218   PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4219   PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4220 
4221   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4222   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4223 
4224   for (i=0; i<m; i++) {
4225     nnz  = Ii[i+1]- Ii[i];
4226     Iii  = Ii[i];
4227     ldi  = ld[i];
4228     md   = Adi[i+1]-Adi[i];
4229     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4230     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4231     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4232     ad  += md;
4233     ao  += nnz - md;
4234   }
4235   nooffprocentries      = mat->nooffprocentries;
4236   mat->nooffprocentries = PETSC_TRUE;
4237   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4238   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4239   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4240   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4241   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4242   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4243   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4244   mat->nooffprocentries = nooffprocentries;
4245   PetscFunctionReturn(0);
4246 }
4247 
4248 /*@
4249      MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values
4250 
4251    Collective
4252 
4253    Input Parameters:
4254 +  mat - the matrix
4255 -  v - matrix values, stored by row
4256 
4257    Level: intermediate
4258 
4259    Notes:
4260    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4261 
4262 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4263           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4264 @*/
4265 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[])
4266 {
4267   PetscInt       nnz,i,m;
4268   PetscBool      nooffprocentries;
4269   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4270   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4271   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4272   PetscScalar    *ad,*ao;
4273   const PetscInt *Adi = Ad->i,*Adj = Ao->i;
4274   PetscInt       ldi,Iii,md;
4275   PetscInt       *ld = Aij->ld;
4276 
4277   PetscFunctionBegin;
4278   m = mat->rmap->n;
4279 
4280   PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad));
4281   PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao));
4282   Iii = 0;
4283   for (i=0; i<m; i++) {
4284     nnz  = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i];
4285     ldi  = ld[i];
4286     md   = Adi[i+1]-Adi[i];
4287     PetscCall(PetscArraycpy(ao,v + Iii,ldi));
4288     PetscCall(PetscArraycpy(ad,v + Iii + ldi,md));
4289     PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md));
4290     ad  += md;
4291     ao  += nnz - md;
4292     Iii += nnz;
4293   }
4294   nooffprocentries      = mat->nooffprocentries;
4295   mat->nooffprocentries = PETSC_TRUE;
4296   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad));
4297   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao));
4298   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4299   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4300   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4301   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
4302   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
4303   mat->nooffprocentries = nooffprocentries;
4304   PetscFunctionReturn(0);
4305 }
4306 
4307 /*@C
4308    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4309    (the default parallel PETSc format).  For good matrix assembly performance
4310    the user should preallocate the matrix storage by setting the parameters
4311    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4312    performance can be increased by more than a factor of 50.
4313 
4314    Collective
4315 
4316    Input Parameters:
4317 +  comm - MPI communicator
4318 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4319            This value should be the same as the local size used in creating the
4320            y vector for the matrix-vector product y = Ax.
4321 .  n - This value should be the same as the local size used in creating the
4322        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4323        calculated if N is given) For square matrices n is almost always m.
4324 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4325 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4326 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4327            (same value is used for all local rows)
4328 .  d_nnz - array containing the number of nonzeros in the various rows of the
4329            DIAGONAL portion of the local submatrix (possibly different for each row)
4330            or NULL, if d_nz is used to specify the nonzero structure.
4331            The size of this array is equal to the number of local rows, i.e 'm'.
4332 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4333            submatrix (same value is used for all local rows).
4334 -  o_nnz - array containing the number of nonzeros in the various rows of the
4335            OFF-DIAGONAL portion of the local submatrix (possibly different for
4336            each row) or NULL, if o_nz is used to specify the nonzero
4337            structure. The size of this array is equal to the number
4338            of local rows, i.e 'm'.
4339 
4340    Output Parameter:
4341 .  A - the matrix
4342 
4343    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4344    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4345    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4346 
4347    Notes:
4348    If the *_nnz parameter is given then the *_nz parameter is ignored
4349 
4350    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4351    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4352    storage requirements for this matrix.
4353 
4354    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4355    processor than it must be used on all processors that share the object for
4356    that argument.
4357 
4358    The user MUST specify either the local or global matrix dimensions
4359    (possibly both).
4360 
4361    The parallel matrix is partitioned across processors such that the
4362    first m0 rows belong to process 0, the next m1 rows belong to
4363    process 1, the next m2 rows belong to process 2 etc.. where
4364    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4365    values corresponding to [m x N] submatrix.
4366 
4367    The columns are logically partitioned with the n0 columns belonging
4368    to 0th partition, the next n1 columns belonging to the next
4369    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4370 
4371    The DIAGONAL portion of the local submatrix on any given processor
4372    is the submatrix corresponding to the rows and columns m,n
4373    corresponding to the given processor. i.e diagonal matrix on
4374    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4375    etc. The remaining portion of the local submatrix [m x (N-n)]
4376    constitute the OFF-DIAGONAL portion. The example below better
4377    illustrates this concept.
4378 
4379    For a square global matrix we define each processor's diagonal portion
4380    to be its local rows and the corresponding columns (a square submatrix);
4381    each processor's off-diagonal portion encompasses the remainder of the
4382    local matrix (a rectangular submatrix).
4383 
4384    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4385 
4386    When calling this routine with a single process communicator, a matrix of
4387    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4388    type of communicator, use the construction mechanism
4389 .vb
4390      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4391 .ve
4392 
4393 $     MatCreate(...,&A);
4394 $     MatSetType(A,MATMPIAIJ);
4395 $     MatSetSizes(A, m,n,M,N);
4396 $     MatMPIAIJSetPreallocation(A,...);
4397 
4398    By default, this format uses inodes (identical nodes) when possible.
4399    We search for consecutive rows with the same nonzero structure, thereby
4400    reusing matrix information to achieve increased efficiency.
4401 
4402    Options Database Keys:
4403 +  -mat_no_inode  - Do not use inodes
4404 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4405 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4406         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4407         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4408 
4409    Example usage:
4410 
4411    Consider the following 8x8 matrix with 34 non-zero values, that is
4412    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4413    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4414    as follows
4415 
4416 .vb
4417             1  2  0  |  0  3  0  |  0  4
4418     Proc0   0  5  6  |  7  0  0  |  8  0
4419             9  0 10  | 11  0  0  | 12  0
4420     -------------------------------------
4421            13  0 14  | 15 16 17  |  0  0
4422     Proc1   0 18  0  | 19 20 21  |  0  0
4423             0  0  0  | 22 23  0  | 24  0
4424     -------------------------------------
4425     Proc2  25 26 27  |  0  0 28  | 29  0
4426            30  0  0  | 31 32 33  |  0 34
4427 .ve
4428 
4429    This can be represented as a collection of submatrices as
4430 
4431 .vb
4432       A B C
4433       D E F
4434       G H I
4435 .ve
4436 
4437    Where the submatrices A,B,C are owned by proc0, D,E,F are
4438    owned by proc1, G,H,I are owned by proc2.
4439 
4440    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4441    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4442    The 'M','N' parameters are 8,8, and have the same values on all procs.
4443 
4444    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4445    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4446    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4447    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4448    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4449    matrix, ans [DF] as another SeqAIJ matrix.
4450 
4451    When d_nz, o_nz parameters are specified, d_nz storage elements are
4452    allocated for every row of the local diagonal submatrix, and o_nz
4453    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4454    One way to choose d_nz and o_nz is to use the max nonzerors per local
4455    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4456    In this case, the values of d_nz,o_nz are
4457 .vb
4458      proc0 : dnz = 2, o_nz = 2
4459      proc1 : dnz = 3, o_nz = 2
4460      proc2 : dnz = 1, o_nz = 4
4461 .ve
4462    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4463    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4464    for proc3. i.e we are using 12+15+10=37 storage locations to store
4465    34 values.
4466 
4467    When d_nnz, o_nnz parameters are specified, the storage is specified
4468    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4469    In the above case the values for d_nnz,o_nnz are
4470 .vb
4471      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4472      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4473      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4474 .ve
4475    Here the space allocated is sum of all the above values i.e 34, and
4476    hence pre-allocation is perfect.
4477 
4478    Level: intermediate
4479 
4480 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4481           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4482 @*/
4483 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4484 {
4485   PetscMPIInt    size;
4486 
4487   PetscFunctionBegin;
4488   PetscCall(MatCreate(comm,A));
4489   PetscCall(MatSetSizes(*A,m,n,M,N));
4490   PetscCallMPI(MPI_Comm_size(comm,&size));
4491   if (size > 1) {
4492     PetscCall(MatSetType(*A,MATMPIAIJ));
4493     PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz));
4494   } else {
4495     PetscCall(MatSetType(*A,MATSEQAIJ));
4496     PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz));
4497   }
4498   PetscFunctionReturn(0);
4499 }
4500 
4501 /*@C
4502   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4503 
4504   Not collective
4505 
4506   Input Parameter:
4507 . A - The MPIAIJ matrix
4508 
4509   Output Parameters:
4510 + Ad - The local diagonal block as a SeqAIJ matrix
4511 . Ao - The local off-diagonal block as a SeqAIJ matrix
4512 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4513 
4514   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4515   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4516   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4517   local column numbers to global column numbers in the original matrix.
4518 
4519   Level: intermediate
4520 
4521 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4522 @*/
4523 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4524 {
4525   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4526   PetscBool      flg;
4527 
4528   PetscFunctionBegin;
4529   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg));
4530   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4531   if (Ad)     *Ad     = a->A;
4532   if (Ao)     *Ao     = a->B;
4533   if (colmap) *colmap = a->garray;
4534   PetscFunctionReturn(0);
4535 }
4536 
4537 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4538 {
4539   PetscInt       m,N,i,rstart,nnz,Ii;
4540   PetscInt       *indx;
4541   PetscScalar    *values;
4542   MatType        rootType;
4543 
4544   PetscFunctionBegin;
4545   PetscCall(MatGetSize(inmat,&m,&N));
4546   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4547     PetscInt       *dnz,*onz,sum,bs,cbs;
4548 
4549     if (n == PETSC_DECIDE) {
4550       PetscCall(PetscSplitOwnership(comm,&n,&N));
4551     }
4552     /* Check sum(n) = N */
4553     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
4554     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4555 
4556     PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm));
4557     rstart -= m;
4558 
4559     MatPreallocateBegin(comm,m,n,dnz,onz);
4560     for (i=0; i<m; i++) {
4561       PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4562       PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz));
4563       PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL));
4564     }
4565 
4566     PetscCall(MatCreate(comm,outmat));
4567     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4568     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
4569     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
4570     PetscCall(MatGetRootType_Private(inmat,&rootType));
4571     PetscCall(MatSetType(*outmat,rootType));
4572     PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz));
4573     PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz));
4574     MatPreallocateEnd(dnz,onz);
4575     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
4576   }
4577 
4578   /* numeric phase */
4579   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
4580   for (i=0; i<m; i++) {
4581     PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4582     Ii   = i + rstart;
4583     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
4584     PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values));
4585   }
4586   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
4587   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
4588   PetscFunctionReturn(0);
4589 }
4590 
4591 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4592 {
4593   PetscMPIInt       rank;
4594   PetscInt          m,N,i,rstart,nnz;
4595   size_t            len;
4596   const PetscInt    *indx;
4597   PetscViewer       out;
4598   char              *name;
4599   Mat               B;
4600   const PetscScalar *values;
4601 
4602   PetscFunctionBegin;
4603   PetscCall(MatGetLocalSize(A,&m,NULL));
4604   PetscCall(MatGetSize(A,NULL,&N));
4605   /* Should this be the type of the diagonal block of A? */
4606   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
4607   PetscCall(MatSetSizes(B,m,N,m,N));
4608   PetscCall(MatSetBlockSizesFromMats(B,A,A));
4609   PetscCall(MatSetType(B,MATSEQAIJ));
4610   PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
4611   PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
4612   for (i=0; i<m; i++) {
4613     PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values));
4614     PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES));
4615     PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values));
4616   }
4617   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
4618   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
4619 
4620   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
4621   PetscCall(PetscStrlen(outfile,&len));
4622   PetscCall(PetscMalloc1(len+6,&name));
4623   PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank));
4624   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out));
4625   PetscCall(PetscFree(name));
4626   PetscCall(MatView(B,out));
4627   PetscCall(PetscViewerDestroy(&out));
4628   PetscCall(MatDestroy(&B));
4629   PetscFunctionReturn(0);
4630 }
4631 
4632 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4633 {
4634   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4635 
4636   PetscFunctionBegin;
4637   if (!merge) PetscFunctionReturn(0);
4638   PetscCall(PetscFree(merge->id_r));
4639   PetscCall(PetscFree(merge->len_s));
4640   PetscCall(PetscFree(merge->len_r));
4641   PetscCall(PetscFree(merge->bi));
4642   PetscCall(PetscFree(merge->bj));
4643   PetscCall(PetscFree(merge->buf_ri[0]));
4644   PetscCall(PetscFree(merge->buf_ri));
4645   PetscCall(PetscFree(merge->buf_rj[0]));
4646   PetscCall(PetscFree(merge->buf_rj));
4647   PetscCall(PetscFree(merge->coi));
4648   PetscCall(PetscFree(merge->coj));
4649   PetscCall(PetscFree(merge->owners_co));
4650   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4651   PetscCall(PetscFree(merge));
4652   PetscFunctionReturn(0);
4653 }
4654 
4655 #include <../src/mat/utils/freespace.h>
4656 #include <petscbt.h>
4657 
4658 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4659 {
4660   MPI_Comm            comm;
4661   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4662   PetscMPIInt         size,rank,taga,*len_s;
4663   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4664   PetscInt            proc,m;
4665   PetscInt            **buf_ri,**buf_rj;
4666   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4667   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4668   MPI_Request         *s_waits,*r_waits;
4669   MPI_Status          *status;
4670   const MatScalar     *aa,*a_a;
4671   MatScalar           **abuf_r,*ba_i;
4672   Mat_Merge_SeqsToMPI *merge;
4673   PetscContainer      container;
4674 
4675   PetscFunctionBegin;
4676   PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm));
4677   PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0));
4678 
4679   PetscCallMPI(MPI_Comm_size(comm,&size));
4680   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4681 
4682   PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container));
4683   PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4684   PetscCall(PetscContainerGetPointer(container,(void**)&merge));
4685   PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a));
4686   aa   = a_a;
4687 
4688   bi     = merge->bi;
4689   bj     = merge->bj;
4690   buf_ri = merge->buf_ri;
4691   buf_rj = merge->buf_rj;
4692 
4693   PetscCall(PetscMalloc1(size,&status));
4694   owners = merge->rowmap->range;
4695   len_s  = merge->len_s;
4696 
4697   /* send and recv matrix values */
4698   /*-----------------------------*/
4699   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga));
4700   PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits));
4701 
4702   PetscCall(PetscMalloc1(merge->nsend+1,&s_waits));
4703   for (proc=0,k=0; proc<size; proc++) {
4704     if (!len_s[proc]) continue;
4705     i    = owners[proc];
4706     PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k));
4707     k++;
4708   }
4709 
4710   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status));
4711   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status));
4712   PetscCall(PetscFree(status));
4713 
4714   PetscCall(PetscFree(s_waits));
4715   PetscCall(PetscFree(r_waits));
4716 
4717   /* insert mat values of mpimat */
4718   /*----------------------------*/
4719   PetscCall(PetscMalloc1(N,&ba_i));
4720   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4721 
4722   for (k=0; k<merge->nrecv; k++) {
4723     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4724     nrows       = *(buf_ri_k[k]);
4725     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4726     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4727   }
4728 
4729   /* set values of ba */
4730   m    = merge->rowmap->n;
4731   for (i=0; i<m; i++) {
4732     arow = owners[rank] + i;
4733     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4734     bnzi = bi[i+1] - bi[i];
4735     PetscCall(PetscArrayzero(ba_i,bnzi));
4736 
4737     /* add local non-zero vals of this proc's seqmat into ba */
4738     anzi   = ai[arow+1] - ai[arow];
4739     aj     = a->j + ai[arow];
4740     aa     = a_a + ai[arow];
4741     nextaj = 0;
4742     for (j=0; nextaj<anzi; j++) {
4743       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4744         ba_i[j] += aa[nextaj++];
4745       }
4746     }
4747 
4748     /* add received vals into ba */
4749     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4750       /* i-th row */
4751       if (i == *nextrow[k]) {
4752         anzi   = *(nextai[k]+1) - *nextai[k];
4753         aj     = buf_rj[k] + *(nextai[k]);
4754         aa     = abuf_r[k] + *(nextai[k]);
4755         nextaj = 0;
4756         for (j=0; nextaj<anzi; j++) {
4757           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4758             ba_i[j] += aa[nextaj++];
4759           }
4760         }
4761         nextrow[k]++; nextai[k]++;
4762       }
4763     }
4764     PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES));
4765   }
4766   PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a));
4767   PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY));
4768   PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY));
4769 
4770   PetscCall(PetscFree(abuf_r[0]));
4771   PetscCall(PetscFree(abuf_r));
4772   PetscCall(PetscFree(ba_i));
4773   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4774   PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0));
4775   PetscFunctionReturn(0);
4776 }
4777 
4778 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4779 {
4780   Mat                 B_mpi;
4781   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4782   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4783   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4784   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4785   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4786   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi;
4787   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4788   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4789   MPI_Status          *status;
4790   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4791   PetscBT             lnkbt;
4792   Mat_Merge_SeqsToMPI *merge;
4793   PetscContainer      container;
4794 
4795   PetscFunctionBegin;
4796   PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0));
4797 
4798   /* make sure it is a PETSc comm */
4799   PetscCall(PetscCommDuplicate(comm,&comm,NULL));
4800   PetscCallMPI(MPI_Comm_size(comm,&size));
4801   PetscCallMPI(MPI_Comm_rank(comm,&rank));
4802 
4803   PetscCall(PetscNew(&merge));
4804   PetscCall(PetscMalloc1(size,&status));
4805 
4806   /* determine row ownership */
4807   /*---------------------------------------------------------*/
4808   PetscCall(PetscLayoutCreate(comm,&merge->rowmap));
4809   PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m));
4810   PetscCall(PetscLayoutSetSize(merge->rowmap,M));
4811   PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1));
4812   PetscCall(PetscLayoutSetUp(merge->rowmap));
4813   PetscCall(PetscMalloc1(size,&len_si));
4814   PetscCall(PetscMalloc1(size,&merge->len_s));
4815 
4816   m      = merge->rowmap->n;
4817   owners = merge->rowmap->range;
4818 
4819   /* determine the number of messages to send, their lengths */
4820   /*---------------------------------------------------------*/
4821   len_s = merge->len_s;
4822 
4823   len          = 0; /* length of buf_si[] */
4824   merge->nsend = 0;
4825   for (proc=0; proc<size; proc++) {
4826     len_si[proc] = 0;
4827     if (proc == rank) {
4828       len_s[proc] = 0;
4829     } else {
4830       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4831       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4832     }
4833     if (len_s[proc]) {
4834       merge->nsend++;
4835       nrows = 0;
4836       for (i=owners[proc]; i<owners[proc+1]; i++) {
4837         if (ai[i+1] > ai[i]) nrows++;
4838       }
4839       len_si[proc] = 2*(nrows+1);
4840       len         += len_si[proc];
4841     }
4842   }
4843 
4844   /* determine the number and length of messages to receive for ij-structure */
4845   /*-------------------------------------------------------------------------*/
4846   PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv));
4847   PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri));
4848 
4849   /* post the Irecv of j-structure */
4850   /*-------------------------------*/
4851   PetscCall(PetscCommGetNewTag(comm,&tagj));
4852   PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits));
4853 
4854   /* post the Isend of j-structure */
4855   /*--------------------------------*/
4856   PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits));
4857 
4858   for (proc=0, k=0; proc<size; proc++) {
4859     if (!len_s[proc]) continue;
4860     i    = owners[proc];
4861     PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k));
4862     k++;
4863   }
4864 
4865   /* receives and sends of j-structure are complete */
4866   /*------------------------------------------------*/
4867   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status));
4868   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status));
4869 
4870   /* send and recv i-structure */
4871   /*---------------------------*/
4872   PetscCall(PetscCommGetNewTag(comm,&tagi));
4873   PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits));
4874 
4875   PetscCall(PetscMalloc1(len+1,&buf_s));
4876   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4877   for (proc=0,k=0; proc<size; proc++) {
4878     if (!len_s[proc]) continue;
4879     /* form outgoing message for i-structure:
4880          buf_si[0]:                 nrows to be sent
4881                [1:nrows]:           row index (global)
4882                [nrows+1:2*nrows+1]: i-structure index
4883     */
4884     /*-------------------------------------------*/
4885     nrows       = len_si[proc]/2 - 1;
4886     buf_si_i    = buf_si + nrows+1;
4887     buf_si[0]   = nrows;
4888     buf_si_i[0] = 0;
4889     nrows       = 0;
4890     for (i=owners[proc]; i<owners[proc+1]; i++) {
4891       anzi = ai[i+1] - ai[i];
4892       if (anzi) {
4893         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4894         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4895         nrows++;
4896       }
4897     }
4898     PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k));
4899     k++;
4900     buf_si += len_si[proc];
4901   }
4902 
4903   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status));
4904   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status));
4905 
4906   PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv));
4907   for (i=0; i<merge->nrecv; i++) {
4908     PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
4909   }
4910 
4911   PetscCall(PetscFree(len_si));
4912   PetscCall(PetscFree(len_ri));
4913   PetscCall(PetscFree(rj_waits));
4914   PetscCall(PetscFree2(si_waits,sj_waits));
4915   PetscCall(PetscFree(ri_waits));
4916   PetscCall(PetscFree(buf_s));
4917   PetscCall(PetscFree(status));
4918 
4919   /* compute a local seq matrix in each processor */
4920   /*----------------------------------------------*/
4921   /* allocate bi array and free space for accumulating nonzero column info */
4922   PetscCall(PetscMalloc1(m+1,&bi));
4923   bi[0] = 0;
4924 
4925   /* create and initialize a linked list */
4926   nlnk = N+1;
4927   PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt));
4928 
4929   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4930   len  = ai[owners[rank+1]] - ai[owners[rank]];
4931   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space));
4932 
4933   current_space = free_space;
4934 
4935   /* determine symbolic info for each local row */
4936   PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai));
4937 
4938   for (k=0; k<merge->nrecv; k++) {
4939     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4940     nrows       = *buf_ri_k[k];
4941     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4942     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4943   }
4944 
4945   MatPreallocateBegin(comm,m,n,dnz,onz);
4946   len  = 0;
4947   for (i=0; i<m; i++) {
4948     bnzi = 0;
4949     /* add local non-zero cols of this proc's seqmat into lnk */
4950     arow  = owners[rank] + i;
4951     anzi  = ai[arow+1] - ai[arow];
4952     aj    = a->j + ai[arow];
4953     PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4954     bnzi += nlnk;
4955     /* add received col data into lnk */
4956     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4957       if (i == *nextrow[k]) { /* i-th row */
4958         anzi  = *(nextai[k]+1) - *nextai[k];
4959         aj    = buf_rj[k] + *nextai[k];
4960         PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt));
4961         bnzi += nlnk;
4962         nextrow[k]++; nextai[k]++;
4963       }
4964     }
4965     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4966 
4967     /* if free space is not available, make more free space */
4968     if (current_space->local_remaining<bnzi) {
4969       PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space));
4970     }
4971     /* copy data into free space, then initialize lnk */
4972     PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt));
4973     PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz));
4974 
4975     current_space->array           += bnzi;
4976     current_space->local_used      += bnzi;
4977     current_space->local_remaining -= bnzi;
4978 
4979     bi[i+1] = bi[i] + bnzi;
4980   }
4981 
4982   PetscCall(PetscFree3(buf_ri_k,nextrow,nextai));
4983 
4984   PetscCall(PetscMalloc1(bi[m]+1,&bj));
4985   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
4986   PetscCall(PetscLLDestroy(lnk,lnkbt));
4987 
4988   /* create symbolic parallel matrix B_mpi */
4989   /*---------------------------------------*/
4990   PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs));
4991   PetscCall(MatCreate(comm,&B_mpi));
4992   if (n==PETSC_DECIDE) {
4993     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N));
4994   } else {
4995     PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
4996   }
4997   PetscCall(MatSetBlockSizes(B_mpi,bs,cbs));
4998   PetscCall(MatSetType(B_mpi,MATMPIAIJ));
4999   PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz));
5000   MatPreallocateEnd(dnz,onz);
5001   PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
5002 
5003   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5004   B_mpi->assembled  = PETSC_FALSE;
5005   merge->bi         = bi;
5006   merge->bj         = bj;
5007   merge->buf_ri     = buf_ri;
5008   merge->buf_rj     = buf_rj;
5009   merge->coi        = NULL;
5010   merge->coj        = NULL;
5011   merge->owners_co  = NULL;
5012 
5013   PetscCall(PetscCommDestroy(&comm));
5014 
5015   /* attach the supporting struct to B_mpi for reuse */
5016   PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container));
5017   PetscCall(PetscContainerSetPointer(container,merge));
5018   PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI));
5019   PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container));
5020   PetscCall(PetscContainerDestroy(&container));
5021   *mpimat = B_mpi;
5022 
5023   PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0));
5024   PetscFunctionReturn(0);
5025 }
5026 
5027 /*@C
5028       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5029                  matrices from each processor
5030 
5031     Collective
5032 
5033    Input Parameters:
5034 +    comm - the communicators the parallel matrix will live on
5035 .    seqmat - the input sequential matrices
5036 .    m - number of local rows (or PETSC_DECIDE)
5037 .    n - number of local columns (or PETSC_DECIDE)
5038 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5039 
5040    Output Parameter:
5041 .    mpimat - the parallel matrix generated
5042 
5043     Level: advanced
5044 
5045    Notes:
5046      The dimensions of the sequential matrix in each processor MUST be the same.
5047      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5048      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5049 @*/
5050 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5051 {
5052   PetscMPIInt    size;
5053 
5054   PetscFunctionBegin;
5055   PetscCallMPI(MPI_Comm_size(comm,&size));
5056   if (size == 1) {
5057     PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5058     if (scall == MAT_INITIAL_MATRIX) {
5059       PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat));
5060     } else {
5061       PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN));
5062     }
5063     PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5064     PetscFunctionReturn(0);
5065   }
5066   PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0));
5067   if (scall == MAT_INITIAL_MATRIX) {
5068     PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat));
5069   }
5070   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat));
5071   PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0));
5072   PetscFunctionReturn(0);
5073 }
5074 
5075 /*@
5076      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5077           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5078           with MatGetSize()
5079 
5080     Not Collective
5081 
5082    Input Parameters:
5083 +    A - the matrix
5084 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5085 
5086    Output Parameter:
5087 .    A_loc - the local sequential matrix generated
5088 
5089     Level: developer
5090 
5091    Notes:
5092      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5093 
5094      Destroy the matrix with MatDestroy()
5095 
5096 .seealso: MatMPIAIJGetLocalMat()
5097 
5098 @*/
5099 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc)
5100 {
5101   PetscBool      mpi;
5102 
5103   PetscFunctionBegin;
5104   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi));
5105   if (mpi) {
5106     PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc));
5107   } else {
5108     *A_loc = A;
5109     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5110   }
5111   PetscFunctionReturn(0);
5112 }
5113 
5114 /*@
5115      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5116           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5117           with MatGetSize()
5118 
5119     Not Collective
5120 
5121    Input Parameters:
5122 +    A - the matrix
5123 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5124 
5125    Output Parameter:
5126 .    A_loc - the local sequential matrix generated
5127 
5128     Level: developer
5129 
5130    Notes:
5131      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5132 
5133      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5134      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5135      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5136      modify the values of the returned A_loc.
5137 
5138 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5139 @*/
5140 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5141 {
5142   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5143   Mat_SeqAIJ        *mat,*a,*b;
5144   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5145   const PetscScalar *aa,*ba,*aav,*bav;
5146   PetscScalar       *ca,*cam;
5147   PetscMPIInt       size;
5148   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5149   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5150   PetscBool         match;
5151 
5152   PetscFunctionBegin;
5153   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match));
5154   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5155   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5156   if (size == 1) {
5157     if (scall == MAT_INITIAL_MATRIX) {
5158       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5159       *A_loc = mpimat->A;
5160     } else if (scall == MAT_REUSE_MATRIX) {
5161       PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN));
5162     }
5163     PetscFunctionReturn(0);
5164   }
5165 
5166   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5167   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5168   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5169   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5170   PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav));
5171   PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav));
5172   aa   = aav;
5173   ba   = bav;
5174   if (scall == MAT_INITIAL_MATRIX) {
5175     PetscCall(PetscMalloc1(1+am,&ci));
5176     ci[0] = 0;
5177     for (i=0; i<am; i++) {
5178       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5179     }
5180     PetscCall(PetscMalloc1(1+ci[am],&cj));
5181     PetscCall(PetscMalloc1(1+ci[am],&ca));
5182     k    = 0;
5183     for (i=0; i<am; i++) {
5184       ncols_o = bi[i+1] - bi[i];
5185       ncols_d = ai[i+1] - ai[i];
5186       /* off-diagonal portion of A */
5187       for (jo=0; jo<ncols_o; jo++) {
5188         col = cmap[*bj];
5189         if (col >= cstart) break;
5190         cj[k]   = col; bj++;
5191         ca[k++] = *ba++;
5192       }
5193       /* diagonal portion of A */
5194       for (j=0; j<ncols_d; j++) {
5195         cj[k]   = cstart + *aj++;
5196         ca[k++] = *aa++;
5197       }
5198       /* off-diagonal portion of A */
5199       for (j=jo; j<ncols_o; j++) {
5200         cj[k]   = cmap[*bj++];
5201         ca[k++] = *ba++;
5202       }
5203     }
5204     /* put together the new matrix */
5205     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc));
5206     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5207     /* Since these are PETSc arrays, change flags to free them as necessary. */
5208     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5209     mat->free_a  = PETSC_TRUE;
5210     mat->free_ij = PETSC_TRUE;
5211     mat->nonew   = 0;
5212   } else if (scall == MAT_REUSE_MATRIX) {
5213     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5214     ci   = mat->i;
5215     cj   = mat->j;
5216     PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam));
5217     for (i=0; i<am; i++) {
5218       /* off-diagonal portion of A */
5219       ncols_o = bi[i+1] - bi[i];
5220       for (jo=0; jo<ncols_o; jo++) {
5221         col = cmap[*bj];
5222         if (col >= cstart) break;
5223         *cam++ = *ba++; bj++;
5224       }
5225       /* diagonal portion of A */
5226       ncols_d = ai[i+1] - ai[i];
5227       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5228       /* off-diagonal portion of A */
5229       for (j=jo; j<ncols_o; j++) {
5230         *cam++ = *ba++; bj++;
5231       }
5232     }
5233     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam));
5234   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5235   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav));
5236   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav));
5237   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5238   PetscFunctionReturn(0);
5239 }
5240 
5241 /*@
5242      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5243           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5244 
5245     Not Collective
5246 
5247    Input Parameters:
5248 +    A - the matrix
5249 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5250 
5251    Output Parameters:
5252 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5253 -    A_loc - the local sequential matrix generated
5254 
5255     Level: developer
5256 
5257    Notes:
5258      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5259 
5260 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5261 
5262 @*/
5263 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5264 {
5265   Mat            Ao,Ad;
5266   const PetscInt *cmap;
5267   PetscMPIInt    size;
5268   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5269 
5270   PetscFunctionBegin;
5271   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap));
5272   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
5273   if (size == 1) {
5274     if (scall == MAT_INITIAL_MATRIX) {
5275       PetscCall(PetscObjectReference((PetscObject)Ad));
5276       *A_loc = Ad;
5277     } else if (scall == MAT_REUSE_MATRIX) {
5278       PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN));
5279     }
5280     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob));
5281     PetscFunctionReturn(0);
5282   }
5283   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f));
5284   PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0));
5285   if (f) {
5286     PetscCall((*f)(A,scall,glob,A_loc));
5287   } else {
5288     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5289     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5290     Mat_SeqAIJ        *c;
5291     PetscInt          *ai = a->i, *aj = a->j;
5292     PetscInt          *bi = b->i, *bj = b->j;
5293     PetscInt          *ci,*cj;
5294     const PetscScalar *aa,*ba;
5295     PetscScalar       *ca;
5296     PetscInt          i,j,am,dn,on;
5297 
5298     PetscCall(MatGetLocalSize(Ad,&am,&dn));
5299     PetscCall(MatGetLocalSize(Ao,NULL,&on));
5300     PetscCall(MatSeqAIJGetArrayRead(Ad,&aa));
5301     PetscCall(MatSeqAIJGetArrayRead(Ao,&ba));
5302     if (scall == MAT_INITIAL_MATRIX) {
5303       PetscInt k;
5304       PetscCall(PetscMalloc1(1+am,&ci));
5305       PetscCall(PetscMalloc1(ai[am]+bi[am],&cj));
5306       PetscCall(PetscMalloc1(ai[am]+bi[am],&ca));
5307       ci[0] = 0;
5308       for (i=0,k=0; i<am; i++) {
5309         const PetscInt ncols_o = bi[i+1] - bi[i];
5310         const PetscInt ncols_d = ai[i+1] - ai[i];
5311         ci[i+1] = ci[i] + ncols_o + ncols_d;
5312         /* diagonal portion of A */
5313         for (j=0; j<ncols_d; j++,k++) {
5314           cj[k] = *aj++;
5315           ca[k] = *aa++;
5316         }
5317         /* off-diagonal portion of A */
5318         for (j=0; j<ncols_o; j++,k++) {
5319           cj[k] = dn + *bj++;
5320           ca[k] = *ba++;
5321         }
5322       }
5323       /* put together the new matrix */
5324       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc));
5325       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5326       /* Since these are PETSc arrays, change flags to free them as necessary. */
5327       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5328       c->free_a  = PETSC_TRUE;
5329       c->free_ij = PETSC_TRUE;
5330       c->nonew   = 0;
5331       PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name));
5332     } else if (scall == MAT_REUSE_MATRIX) {
5333       PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca));
5334       for (i=0; i<am; i++) {
5335         const PetscInt ncols_d = ai[i+1] - ai[i];
5336         const PetscInt ncols_o = bi[i+1] - bi[i];
5337         /* diagonal portion of A */
5338         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5339         /* off-diagonal portion of A */
5340         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5341       }
5342       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca));
5343     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5344     PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa));
5345     PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa));
5346     if (glob) {
5347       PetscInt cst, *gidx;
5348 
5349       PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL));
5350       PetscCall(PetscMalloc1(dn+on,&gidx));
5351       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5352       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5353       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob));
5354     }
5355   }
5356   PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0));
5357   PetscFunctionReturn(0);
5358 }
5359 
5360 /*@C
5361      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5362 
5363     Not Collective
5364 
5365    Input Parameters:
5366 +    A - the matrix
5367 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5368 -    row, col - index sets of rows and columns to extract (or NULL)
5369 
5370    Output Parameter:
5371 .    A_loc - the local sequential matrix generated
5372 
5373     Level: developer
5374 
5375 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5376 
5377 @*/
5378 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5379 {
5380   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5381   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5382   IS             isrowa,iscola;
5383   Mat            *aloc;
5384   PetscBool      match;
5385 
5386   PetscFunctionBegin;
5387   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match));
5388   PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5389   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0));
5390   if (!row) {
5391     start = A->rmap->rstart; end = A->rmap->rend;
5392     PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa));
5393   } else {
5394     isrowa = *row;
5395   }
5396   if (!col) {
5397     start = A->cmap->rstart;
5398     cmap  = a->garray;
5399     nzA   = a->A->cmap->n;
5400     nzB   = a->B->cmap->n;
5401     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5402     ncols = 0;
5403     for (i=0; i<nzB; i++) {
5404       if (cmap[i] < start) idx[ncols++] = cmap[i];
5405       else break;
5406     }
5407     imark = i;
5408     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5409     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5410     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola));
5411   } else {
5412     iscola = *col;
5413   }
5414   if (scall != MAT_INITIAL_MATRIX) {
5415     PetscCall(PetscMalloc1(1,&aloc));
5416     aloc[0] = *A_loc;
5417   }
5418   PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc));
5419   if (!col) { /* attach global id of condensed columns */
5420     PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola));
5421   }
5422   *A_loc = aloc[0];
5423   PetscCall(PetscFree(aloc));
5424   if (!row) {
5425     PetscCall(ISDestroy(&isrowa));
5426   }
5427   if (!col) {
5428     PetscCall(ISDestroy(&iscola));
5429   }
5430   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0));
5431   PetscFunctionReturn(0);
5432 }
5433 
5434 /*
5435  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5436  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5437  * on a global size.
5438  * */
5439 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5440 {
5441   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5442   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5443   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5444   PetscMPIInt              owner;
5445   PetscSFNode              *iremote,*oiremote;
5446   const PetscInt           *lrowindices;
5447   PetscSF                  sf,osf;
5448   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5449   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5450   MPI_Comm                 comm;
5451   ISLocalToGlobalMapping   mapping;
5452   const PetscScalar        *pd_a,*po_a;
5453 
5454   PetscFunctionBegin;
5455   PetscCall(PetscObjectGetComm((PetscObject)P,&comm));
5456   /* plocalsize is the number of roots
5457    * nrows is the number of leaves
5458    * */
5459   PetscCall(MatGetLocalSize(P,&plocalsize,NULL));
5460   PetscCall(ISGetLocalSize(rows,&nrows));
5461   PetscCall(PetscCalloc1(nrows,&iremote));
5462   PetscCall(ISGetIndices(rows,&lrowindices));
5463   for (i=0;i<nrows;i++) {
5464     /* Find a remote index and an owner for a row
5465      * The row could be local or remote
5466      * */
5467     owner = 0;
5468     lidx  = 0;
5469     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx));
5470     iremote[i].index = lidx;
5471     iremote[i].rank  = owner;
5472   }
5473   /* Create SF to communicate how many nonzero columns for each row */
5474   PetscCall(PetscSFCreate(comm,&sf));
5475   /* SF will figure out the number of nonzero colunms for each row, and their
5476    * offsets
5477    * */
5478   PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5479   PetscCall(PetscSFSetFromOptions(sf));
5480   PetscCall(PetscSFSetUp(sf));
5481 
5482   PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets));
5483   PetscCall(PetscCalloc1(2*plocalsize,&nrcols));
5484   PetscCall(PetscCalloc1(nrows,&pnnz));
5485   roffsets[0] = 0;
5486   roffsets[1] = 0;
5487   for (i=0;i<plocalsize;i++) {
5488     /* diag */
5489     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5490     /* off diag */
5491     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5492     /* compute offsets so that we relative location for each row */
5493     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5494     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5495   }
5496   PetscCall(PetscCalloc1(2*nrows,&nlcols));
5497   PetscCall(PetscCalloc1(2*nrows,&loffsets));
5498   /* 'r' means root, and 'l' means leaf */
5499   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5500   PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5501   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE));
5502   PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE));
5503   PetscCall(PetscSFDestroy(&sf));
5504   PetscCall(PetscFree(roffsets));
5505   PetscCall(PetscFree(nrcols));
5506   dntotalcols = 0;
5507   ontotalcols = 0;
5508   ncol = 0;
5509   for (i=0;i<nrows;i++) {
5510     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5511     ncol = PetscMax(pnnz[i],ncol);
5512     /* diag */
5513     dntotalcols += nlcols[i*2+0];
5514     /* off diag */
5515     ontotalcols += nlcols[i*2+1];
5516   }
5517   /* We do not need to figure the right number of columns
5518    * since all the calculations will be done by going through the raw data
5519    * */
5520   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth));
5521   PetscCall(MatSetUp(*P_oth));
5522   PetscCall(PetscFree(pnnz));
5523   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5524   /* diag */
5525   PetscCall(PetscCalloc1(dntotalcols,&iremote));
5526   /* off diag */
5527   PetscCall(PetscCalloc1(ontotalcols,&oiremote));
5528   /* diag */
5529   PetscCall(PetscCalloc1(dntotalcols,&ilocal));
5530   /* off diag */
5531   PetscCall(PetscCalloc1(ontotalcols,&oilocal));
5532   dntotalcols = 0;
5533   ontotalcols = 0;
5534   ntotalcols  = 0;
5535   for (i=0;i<nrows;i++) {
5536     owner = 0;
5537     PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL));
5538     /* Set iremote for diag matrix */
5539     for (j=0;j<nlcols[i*2+0];j++) {
5540       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5541       iremote[dntotalcols].rank    = owner;
5542       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5543       ilocal[dntotalcols++]        = ntotalcols++;
5544     }
5545     /* off diag */
5546     for (j=0;j<nlcols[i*2+1];j++) {
5547       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5548       oiremote[ontotalcols].rank    = owner;
5549       oilocal[ontotalcols++]        = ntotalcols++;
5550     }
5551   }
5552   PetscCall(ISRestoreIndices(rows,&lrowindices));
5553   PetscCall(PetscFree(loffsets));
5554   PetscCall(PetscFree(nlcols));
5555   PetscCall(PetscSFCreate(comm,&sf));
5556   /* P serves as roots and P_oth is leaves
5557    * Diag matrix
5558    * */
5559   PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
5560   PetscCall(PetscSFSetFromOptions(sf));
5561   PetscCall(PetscSFSetUp(sf));
5562 
5563   PetscCall(PetscSFCreate(comm,&osf));
5564   /* Off diag */
5565   PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER));
5566   PetscCall(PetscSFSetFromOptions(osf));
5567   PetscCall(PetscSFSetUp(osf));
5568   PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5569   PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5570   /* We operate on the matrix internal data for saving memory */
5571   PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5572   PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5573   PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL));
5574   /* Convert to global indices for diag matrix */
5575   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5576   PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5577   /* We want P_oth store global indices */
5578   PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping));
5579   /* Use memory scalable approach */
5580   PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH));
5581   PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j));
5582   PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5583   PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE));
5584   /* Convert back to local indices */
5585   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5586   PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE));
5587   nout = 0;
5588   PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j));
5589   PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5590   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5591   /* Exchange values */
5592   PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5593   PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5594   PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5595   PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5596   /* Stop PETSc from shrinking memory */
5597   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5598   PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY));
5599   PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY));
5600   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5601   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf));
5602   PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf));
5603   PetscCall(PetscSFDestroy(&sf));
5604   PetscCall(PetscSFDestroy(&osf));
5605   PetscFunctionReturn(0);
5606 }
5607 
5608 /*
5609  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5610  * This supports MPIAIJ and MAIJ
5611  * */
5612 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5613 {
5614   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5615   Mat_SeqAIJ            *p_oth;
5616   IS                    rows,map;
5617   PetscHMapI            hamp;
5618   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5619   MPI_Comm              comm;
5620   PetscSF               sf,osf;
5621   PetscBool             has;
5622 
5623   PetscFunctionBegin;
5624   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5625   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0));
5626   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5627    *  and then create a submatrix (that often is an overlapping matrix)
5628    * */
5629   if (reuse == MAT_INITIAL_MATRIX) {
5630     /* Use a hash table to figure out unique keys */
5631     PetscCall(PetscHMapICreate(&hamp));
5632     PetscCall(PetscHMapIResize(hamp,a->B->cmap->n));
5633     PetscCall(PetscCalloc1(a->B->cmap->n,&mapping));
5634     count = 0;
5635     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5636     for (i=0;i<a->B->cmap->n;i++) {
5637       key  = a->garray[i]/dof;
5638       PetscCall(PetscHMapIHas(hamp,key,&has));
5639       if (!has) {
5640         mapping[i] = count;
5641         PetscCall(PetscHMapISet(hamp,key,count++));
5642       } else {
5643         /* Current 'i' has the same value the previous step */
5644         mapping[i] = count-1;
5645       }
5646     }
5647     PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map));
5648     PetscCall(PetscHMapIGetSize(hamp,&htsize));
5649     PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5650     PetscCall(PetscCalloc1(htsize,&rowindices));
5651     off = 0;
5652     PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices));
5653     PetscCall(PetscHMapIDestroy(&hamp));
5654     PetscCall(PetscSortInt(htsize,rowindices));
5655     PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows));
5656     /* In case, the matrix was already created but users want to recreate the matrix */
5657     PetscCall(MatDestroy(P_oth));
5658     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth));
5659     PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map));
5660     PetscCall(ISDestroy(&map));
5661     PetscCall(ISDestroy(&rows));
5662   } else if (reuse == MAT_REUSE_MATRIX) {
5663     /* If matrix was already created, we simply update values using SF objects
5664      * that as attached to the matrix ealier.
5665      */
5666     const PetscScalar *pd_a,*po_a;
5667 
5668     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf));
5669     PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf));
5670     PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5671     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5672     /* Update values in place */
5673     PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a));
5674     PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a));
5675     PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5676     PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5677     PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE));
5678     PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE));
5679     PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a));
5680     PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a));
5681   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5682   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0));
5683   PetscFunctionReturn(0);
5684 }
5685 
5686 /*@C
5687   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5688 
5689   Collective on Mat
5690 
5691   Input Parameters:
5692 + A - the first matrix in mpiaij format
5693 . B - the second matrix in mpiaij format
5694 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5695 
5696   Output Parameters:
5697 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5698 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5699 - B_seq - the sequential matrix generated
5700 
5701   Level: developer
5702 
5703 @*/
5704 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5705 {
5706   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5707   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5708   IS             isrowb,iscolb;
5709   Mat            *bseq=NULL;
5710 
5711   PetscFunctionBegin;
5712   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5713     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5714   }
5715   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0));
5716 
5717   if (scall == MAT_INITIAL_MATRIX) {
5718     start = A->cmap->rstart;
5719     cmap  = a->garray;
5720     nzA   = a->A->cmap->n;
5721     nzB   = a->B->cmap->n;
5722     PetscCall(PetscMalloc1(nzA+nzB, &idx));
5723     ncols = 0;
5724     for (i=0; i<nzB; i++) {  /* row < local row index */
5725       if (cmap[i] < start) idx[ncols++] = cmap[i];
5726       else break;
5727     }
5728     imark = i;
5729     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5730     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5731     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb));
5732     PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb));
5733   } else {
5734     PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5735     isrowb  = *rowb; iscolb = *colb;
5736     PetscCall(PetscMalloc1(1,&bseq));
5737     bseq[0] = *B_seq;
5738   }
5739   PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq));
5740   *B_seq = bseq[0];
5741   PetscCall(PetscFree(bseq));
5742   if (!rowb) {
5743     PetscCall(ISDestroy(&isrowb));
5744   } else {
5745     *rowb = isrowb;
5746   }
5747   if (!colb) {
5748     PetscCall(ISDestroy(&iscolb));
5749   } else {
5750     *colb = iscolb;
5751   }
5752   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0));
5753   PetscFunctionReturn(0);
5754 }
5755 
5756 /*
5757     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5758     of the OFF-DIAGONAL portion of local A
5759 
5760     Collective on Mat
5761 
5762    Input Parameters:
5763 +    A,B - the matrices in mpiaij format
5764 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5765 
5766    Output Parameter:
5767 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5768 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5769 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5770 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5771 
5772     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5773      for this matrix. This is not desirable..
5774 
5775     Level: developer
5776 
5777 */
5778 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5779 {
5780   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5781   Mat_SeqAIJ             *b_oth;
5782   VecScatter             ctx;
5783   MPI_Comm               comm;
5784   const PetscMPIInt      *rprocs,*sprocs;
5785   const PetscInt         *srow,*rstarts,*sstarts;
5786   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5787   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5788   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5789   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5790   PetscMPIInt            size,tag,rank,nreqs;
5791 
5792   PetscFunctionBegin;
5793   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
5794   PetscCallMPI(MPI_Comm_size(comm,&size));
5795 
5796   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5797     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5798   }
5799   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0));
5800   PetscCallMPI(MPI_Comm_rank(comm,&rank));
5801 
5802   if (size == 1) {
5803     startsj_s = NULL;
5804     bufa_ptr  = NULL;
5805     *B_oth    = NULL;
5806     PetscFunctionReturn(0);
5807   }
5808 
5809   ctx = a->Mvctx;
5810   tag = ((PetscObject)ctx)->tag;
5811 
5812   PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs));
5813   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5814   PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs));
5815   PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs));
5816   PetscCall(PetscMalloc1(nreqs,&reqs));
5817   rwaits = reqs;
5818   swaits = reqs + nrecvs;
5819 
5820   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5821   if (scall == MAT_INITIAL_MATRIX) {
5822     /* i-array */
5823     /*---------*/
5824     /*  post receives */
5825     if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */
5826     for (i=0; i<nrecvs; i++) {
5827       rowlen = rvalues + rstarts[i]*rbs;
5828       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5829       PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5830     }
5831 
5832     /* pack the outgoing message */
5833     PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj));
5834 
5835     sstartsj[0] = 0;
5836     rstartsj[0] = 0;
5837     len         = 0; /* total length of j or a array to be sent */
5838     if (nsends) {
5839       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5840       PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues));
5841     }
5842     for (i=0; i<nsends; i++) {
5843       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5844       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5845       for (j=0; j<nrows; j++) {
5846         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5847         for (l=0; l<sbs; l++) {
5848           PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */
5849 
5850           rowlen[j*sbs+l] = ncols;
5851 
5852           len += ncols;
5853           PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL));
5854         }
5855         k++;
5856       }
5857       PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i));
5858 
5859       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5860     }
5861     /* recvs and sends of i-array are completed */
5862     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5863     PetscCall(PetscFree(svalues));
5864 
5865     /* allocate buffers for sending j and a arrays */
5866     PetscCall(PetscMalloc1(len+1,&bufj));
5867     PetscCall(PetscMalloc1(len+1,&bufa));
5868 
5869     /* create i-array of B_oth */
5870     PetscCall(PetscMalloc1(aBn+2,&b_othi));
5871 
5872     b_othi[0] = 0;
5873     len       = 0; /* total length of j or a array to be received */
5874     k         = 0;
5875     for (i=0; i<nrecvs; i++) {
5876       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5877       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5878       for (j=0; j<nrows; j++) {
5879         b_othi[k+1] = b_othi[k] + rowlen[j];
5880         PetscCall(PetscIntSumError(rowlen[j],len,&len));
5881         k++;
5882       }
5883       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5884     }
5885     PetscCall(PetscFree(rvalues));
5886 
5887     /* allocate space for j and a arrays of B_oth */
5888     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj));
5889     PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha));
5890 
5891     /* j-array */
5892     /*---------*/
5893     /*  post receives of j-array */
5894     for (i=0; i<nrecvs; i++) {
5895       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5896       PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i));
5897     }
5898 
5899     /* pack the outgoing message j-array */
5900     if (nsends) k = sstarts[0];
5901     for (i=0; i<nsends; i++) {
5902       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5903       bufJ  = bufj+sstartsj[i];
5904       for (j=0; j<nrows; j++) {
5905         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5906         for (ll=0; ll<sbs; ll++) {
5907           PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5908           for (l=0; l<ncols; l++) {
5909             *bufJ++ = cols[l];
5910           }
5911           PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL));
5912         }
5913       }
5914       PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i));
5915     }
5916 
5917     /* recvs and sends of j-array are completed */
5918     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5919   } else if (scall == MAT_REUSE_MATRIX) {
5920     sstartsj = *startsj_s;
5921     rstartsj = *startsj_r;
5922     bufa     = *bufa_ptr;
5923     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5924     PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha));
5925   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5926 
5927   /* a-array */
5928   /*---------*/
5929   /*  post receives of a-array */
5930   for (i=0; i<nrecvs; i++) {
5931     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5932     PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i));
5933   }
5934 
5935   /* pack the outgoing message a-array */
5936   if (nsends) k = sstarts[0];
5937   for (i=0; i<nsends; i++) {
5938     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5939     bufA  = bufa+sstartsj[i];
5940     for (j=0; j<nrows; j++) {
5941       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5942       for (ll=0; ll<sbs; ll++) {
5943         PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5944         for (l=0; l<ncols; l++) {
5945           *bufA++ = vals[l];
5946         }
5947         PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals));
5948       }
5949     }
5950     PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i));
5951   }
5952   /* recvs and sends of a-array are completed */
5953   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE));
5954   PetscCall(PetscFree(reqs));
5955 
5956   if (scall == MAT_INITIAL_MATRIX) {
5957     /* put together the new matrix */
5958     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth));
5959 
5960     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5961     /* Since these are PETSc arrays, change flags to free them as necessary. */
5962     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5963     b_oth->free_a  = PETSC_TRUE;
5964     b_oth->free_ij = PETSC_TRUE;
5965     b_oth->nonew   = 0;
5966 
5967     PetscCall(PetscFree(bufj));
5968     if (!startsj_s || !bufa_ptr) {
5969       PetscCall(PetscFree2(sstartsj,rstartsj));
5970       PetscCall(PetscFree(bufa_ptr));
5971     } else {
5972       *startsj_s = sstartsj;
5973       *startsj_r = rstartsj;
5974       *bufa_ptr  = bufa;
5975     }
5976   } else if (scall == MAT_REUSE_MATRIX) {
5977     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha));
5978   }
5979 
5980   PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs));
5981   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs));
5982   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0));
5983   PetscFunctionReturn(0);
5984 }
5985 
5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5989 #if defined(PETSC_HAVE_MKL_SPARSE)
5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5991 #endif
5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5994 #if defined(PETSC_HAVE_ELEMENTAL)
5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5996 #endif
5997 #if defined(PETSC_HAVE_SCALAPACK)
5998 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5999 #endif
6000 #if defined(PETSC_HAVE_HYPRE)
6001 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
6002 #endif
6003 #if defined(PETSC_HAVE_CUDA)
6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
6005 #endif
6006 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
6008 #endif
6009 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
6010 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
6011 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6012 
6013 /*
6014     Computes (B'*A')' since computing B*A directly is untenable
6015 
6016                n                       p                          p
6017         [             ]       [             ]         [                 ]
6018       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6019         [             ]       [             ]         [                 ]
6020 
6021 */
6022 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
6023 {
6024   Mat            At,Bt,Ct;
6025 
6026   PetscFunctionBegin;
6027   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At));
6028   PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt));
6029   PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct));
6030   PetscCall(MatDestroy(&At));
6031   PetscCall(MatDestroy(&Bt));
6032   PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C));
6033   PetscCall(MatDestroy(&Ct));
6034   PetscFunctionReturn(0);
6035 }
6036 
6037 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6038 {
6039   PetscBool      cisdense;
6040 
6041   PetscFunctionBegin;
6042   PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
6043   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
6044   PetscCall(MatSetBlockSizesFromMats(C,A,B));
6045   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
6046   if (!cisdense) {
6047     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
6048   }
6049   PetscCall(MatSetUp(C));
6050 
6051   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6052   PetscFunctionReturn(0);
6053 }
6054 
6055 /* ----------------------------------------------------------------*/
6056 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6057 {
6058   Mat_Product *product = C->product;
6059   Mat         A = product->A,B=product->B;
6060 
6061   PetscFunctionBegin;
6062   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6063     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6064 
6065   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6066   C->ops->productsymbolic = MatProductSymbolic_AB;
6067   PetscFunctionReturn(0);
6068 }
6069 
6070 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6071 {
6072   Mat_Product    *product = C->product;
6073 
6074   PetscFunctionBegin;
6075   if (product->type == MATPRODUCT_AB) {
6076     PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6077   }
6078   PetscFunctionReturn(0);
6079 }
6080 
6081 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6082 
6083   Input Parameters:
6084 
6085     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6086     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6087 
6088     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6089 
6090     For Set1, j1[] contains column indices of the nonzeros.
6091     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6092     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6093     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6094 
6095     Similar for Set2.
6096 
6097     This routine merges the two sets of nonzeros row by row and removes repeats.
6098 
6099   Output Parameters: (memory is allocated by the caller)
6100 
6101     i[],j[]: the CSR of the merged matrix, which has m rows.
6102     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6103     imap2[]: similar to imap1[], but for Set2.
6104     Note we order nonzeros row-by-row and from left to right.
6105 */
6106 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6107   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6108   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6109 {
6110   PetscInt       r,m; /* Row index of mat */
6111   PetscCount     t,t1,t2,b1,e1,b2,e2;
6112 
6113   PetscFunctionBegin;
6114   PetscCall(MatGetLocalSize(mat,&m,NULL));
6115   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6116   i[0] = 0;
6117   for (r=0; r<m; r++) { /* Do row by row merging */
6118     b1   = rowBegin1[r];
6119     e1   = rowEnd1[r];
6120     b2   = rowBegin2[r];
6121     e2   = rowEnd2[r];
6122     while (b1 < e1 && b2 < e2) {
6123       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6124         j[t]      = j1[b1];
6125         imap1[t1] = t;
6126         imap2[t2] = t;
6127         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6128         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6129         t1++; t2++; t++;
6130       } else if (j1[b1] < j2[b2]) {
6131         j[t]      = j1[b1];
6132         imap1[t1] = t;
6133         b1       += jmap1[t1+1] - jmap1[t1];
6134         t1++; t++;
6135       } else {
6136         j[t]      = j2[b2];
6137         imap2[t2] = t;
6138         b2       += jmap2[t2+1] - jmap2[t2];
6139         t2++; t++;
6140       }
6141     }
6142     /* Merge the remaining in either j1[] or j2[] */
6143     while (b1 < e1) {
6144       j[t]      = j1[b1];
6145       imap1[t1] = t;
6146       b1       += jmap1[t1+1] - jmap1[t1];
6147       t1++; t++;
6148     }
6149     while (b2 < e2) {
6150       j[t]      = j2[b2];
6151       imap2[t2] = t;
6152       b2       += jmap2[t2+1] - jmap2[t2];
6153       t2++; t++;
6154     }
6155     i[r+1] = t;
6156   }
6157   PetscFunctionReturn(0);
6158 }
6159 
6160 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6161 
6162   Input Parameters:
6163     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6164     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6165       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6166 
6167       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6168       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6169 
6170   Output Parameters:
6171     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6172     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6173       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6174       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6175 
6176     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6177       Atot: number of entries belonging to the diagonal block.
6178       Annz: number of unique nonzeros belonging to the diagonal block.
6179       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6180         repeats (i.e., same 'i,j' pair).
6181       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6182         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6183 
6184       Atot: number of entries belonging to the diagonal block
6185       Annz: number of unique nonzeros belonging to the diagonal block.
6186 
6187     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6188 
6189     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6190 */
6191 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6192   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6193   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6194   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6195 {
6196   PetscInt          cstart,cend,rstart,rend,row,col;
6197   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6198   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6199   PetscCount        k,m,p,q,r,s,mid;
6200   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6201 
6202   PetscFunctionBegin;
6203   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6204   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6205   m    = rend - rstart;
6206 
6207   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6208 
6209   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6210      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6211   */
6212   while (k<n) {
6213     row = i[k];
6214     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6215     for (s=k; s<n; s++) if (i[s] != row) break;
6216     for (p=k; p<s; p++) {
6217       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6218       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6219     }
6220     PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k));
6221     PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6222     rowBegin[row-rstart] = k;
6223     rowMid[row-rstart]   = mid;
6224     rowEnd[row-rstart]   = s;
6225 
6226     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6227     Atot += mid - k;
6228     Btot += s - mid;
6229 
6230     /* Count unique nonzeros of this diag/offdiag row */
6231     for (p=k; p<mid;) {
6232       col = j[p];
6233       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6234       Annz++;
6235     }
6236 
6237     for (p=mid; p<s;) {
6238       col = j[p];
6239       do {p++;} while (p<s && j[p] == col);
6240       Bnnz++;
6241     }
6242     k = s;
6243   }
6244 
6245   /* Allocation according to Atot, Btot, Annz, Bnnz */
6246   PetscCall(PetscMalloc1(Atot,&Aperm));
6247   PetscCall(PetscMalloc1(Btot,&Bperm));
6248   PetscCall(PetscMalloc1(Annz+1,&Ajmap));
6249   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap));
6250 
6251   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6252   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6253   for (r=0; r<m; r++) {
6254     k     = rowBegin[r];
6255     mid   = rowMid[r];
6256     s     = rowEnd[r];
6257     PetscCall(PetscArraycpy(Aperm+Atot,perm+k,  mid-k));
6258     PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid));
6259     Atot += mid - k;
6260     Btot += s - mid;
6261 
6262     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6263     for (p=k; p<mid;) {
6264       col = j[p];
6265       q   = p;
6266       do {p++;} while (p<mid && j[p] == col);
6267       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6268       Annz++;
6269     }
6270 
6271     for (p=mid; p<s;) {
6272       col = j[p];
6273       q   = p;
6274       do {p++;} while (p<s && j[p] == col);
6275       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6276       Bnnz++;
6277     }
6278   }
6279   /* Output */
6280   *Aperm_ = Aperm;
6281   *Annz_  = Annz;
6282   *Atot_  = Atot;
6283   *Ajmap_ = Ajmap;
6284   *Bperm_ = Bperm;
6285   *Bnnz_  = Bnnz;
6286   *Btot_  = Btot;
6287   *Bjmap_ = Bjmap;
6288   PetscFunctionReturn(0);
6289 }
6290 
6291 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6292 
6293   Input Parameters:
6294     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6295     nnz:  number of unique nonzeros in the merged matrix
6296     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6297     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6298 
6299   Output Parameter: (memory is allocated by the caller)
6300     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6301 
6302   Example:
6303     nnz1 = 4
6304     nnz  = 6
6305     imap = [1,3,4,5]
6306     jmap = [0,3,5,6,7]
6307    then,
6308     jmap_new = [0,0,3,3,5,6,7]
6309 */
6310 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[])
6311 {
6312   PetscCount k,p;
6313 
6314   PetscFunctionBegin;
6315   jmap_new[0] = 0;
6316   p = nnz; /* p loops over jmap_new[] backwards */
6317   for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */
6318     for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1];
6319   }
6320   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6321   PetscFunctionReturn(0);
6322 }
6323 
6324 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6325 {
6326   MPI_Comm                  comm;
6327   PetscMPIInt               rank,size;
6328   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6329   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6330   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6331 
6332   PetscFunctionBegin;
6333   PetscCall(PetscFree(mpiaij->garray));
6334   PetscCall(VecDestroy(&mpiaij->lvec));
6335 #if defined(PETSC_USE_CTABLE)
6336   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6337 #else
6338   PetscCall(PetscFree(mpiaij->colmap));
6339 #endif
6340   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6341   mat->assembled = PETSC_FALSE;
6342   mat->was_assembled = PETSC_FALSE;
6343   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6344 
6345   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
6346   PetscCallMPI(MPI_Comm_size(comm,&size));
6347   PetscCallMPI(MPI_Comm_rank(comm,&rank));
6348   PetscCall(PetscLayoutSetUp(mat->rmap));
6349   PetscCall(PetscLayoutSetUp(mat->cmap));
6350   PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend));
6351   PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend));
6352   PetscCall(MatGetLocalSize(mat,&m,&n));
6353   PetscCall(MatGetSize(mat,&M,&N));
6354 
6355   /* ---------------------------------------------------------------------------*/
6356   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6357   /* entries come first, then local rows, then remote rows.                     */
6358   /* ---------------------------------------------------------------------------*/
6359   PetscCount n1 = coo_n,*perm1;
6360   PetscInt   *i1 = coo_i,*j1 = coo_j;
6361 
6362   PetscCall(PetscMalloc1(n1,&perm1));
6363   for (k=0; k<n1; k++) perm1[k] = k;
6364 
6365   /* Manipulate indices so that entries with negative row or col indices will have smallest
6366      row indices, local entries will have greater but negative row indices, and remote entries
6367      will have positive row indices.
6368   */
6369   for (k=0; k<n1; k++) {
6370     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6371     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6372     else {
6373       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6374       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6375     }
6376   }
6377 
6378   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6379   PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1));
6380   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6381   PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */
6382   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6383 
6384   /* ---------------------------------------------------------------------------*/
6385   /*           Split local rows into diag/offdiag portions                      */
6386   /* ---------------------------------------------------------------------------*/
6387   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6388   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6389   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6390 
6391   PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1));
6392   PetscCall(PetscMalloc1(n1-rem,&Cperm1));
6393   PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1));
6394 
6395   /* ---------------------------------------------------------------------------*/
6396   /*           Send remote rows to their owner                                  */
6397   /* ---------------------------------------------------------------------------*/
6398   /* Find which rows should be sent to which remote ranks*/
6399   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6400   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6401   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6402   const PetscInt *ranges;
6403   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6404 
6405   PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges));
6406   PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries));
6407   for (k=rem; k<n1;) {
6408     PetscMPIInt  owner;
6409     PetscInt     firstRow,lastRow;
6410 
6411     /* Locate a row range */
6412     firstRow = i1[k]; /* first row of this owner */
6413     PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner));
6414     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6415 
6416     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6417     PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p));
6418 
6419     /* All entries in [k,p) belong to this remote owner */
6420     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6421       PetscMPIInt *sendto2;
6422       PetscInt    *nentries2;
6423       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6424 
6425       PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2));
6426       PetscCall(PetscArraycpy(sendto2,sendto,maxNsend));
6427       PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1));
6428       PetscCall(PetscFree2(sendto,nentries2));
6429       sendto      = sendto2;
6430       nentries    = nentries2;
6431       maxNsend    = maxNsend2;
6432     }
6433     sendto[nsend]   = owner;
6434     nentries[nsend] = p - k;
6435     PetscCall(PetscCountCast(p-k,&nentries[nsend]));
6436     nsend++;
6437     k = p;
6438   }
6439 
6440   /* Build 1st SF to know offsets on remote to send data */
6441   PetscSF     sf1;
6442   PetscInt    nroots = 1,nroots2 = 0;
6443   PetscInt    nleaves = nsend,nleaves2 = 0;
6444   PetscInt    *offsets;
6445   PetscSFNode *iremote;
6446 
6447   PetscCall(PetscSFCreate(comm,&sf1));
6448   PetscCall(PetscMalloc1(nsend,&iremote));
6449   PetscCall(PetscMalloc1(nsend,&offsets));
6450   for (k=0; k<nsend; k++) {
6451     iremote[k].rank  = sendto[k];
6452     iremote[k].index = 0;
6453     nleaves2        += nentries[k];
6454     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6455   }
6456   PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6457   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM));
6458   PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6459   PetscCall(PetscSFDestroy(&sf1));
6460   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem);
6461 
6462   /* Build 2nd SF to send remote COOs to their owner */
6463   PetscSF sf2;
6464   nroots  = nroots2;
6465   nleaves = nleaves2;
6466   PetscCall(PetscSFCreate(comm,&sf2));
6467   PetscCall(PetscSFSetFromOptions(sf2));
6468   PetscCall(PetscMalloc1(nleaves,&iremote));
6469   p       = 0;
6470   for (k=0; k<nsend; k++) {
6471     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6472     for (q=0; q<nentries[k]; q++,p++) {
6473       iremote[p].rank  = sendto[k];
6474       iremote[p].index = offsets[k] + q;
6475     }
6476   }
6477   PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER));
6478 
6479   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6480   PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem));
6481 
6482   /* Send the remote COOs to their owner */
6483   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6484   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6485   PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2));
6486   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE));
6487   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE));
6488   PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE));
6489   PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE));
6490 
6491   PetscCall(PetscFree(offsets));
6492   PetscCall(PetscFree2(sendto,nentries));
6493 
6494   /* ---------------------------------------------------------------*/
6495   /* Sort received COOs by row along with the permutation array     */
6496   /* ---------------------------------------------------------------*/
6497   for (k=0; k<n2; k++) perm2[k] = k;
6498   PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2));
6499 
6500   /* ---------------------------------------------------------------*/
6501   /* Split received COOs into diag/offdiag portions                 */
6502   /* ---------------------------------------------------------------*/
6503   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6504   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6505   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6506 
6507   PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2));
6508   PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2));
6509 
6510   /* --------------------------------------------------------------------------*/
6511   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6512   /* --------------------------------------------------------------------------*/
6513   PetscInt   *Ai,*Bi;
6514   PetscInt   *Aj,*Bj;
6515 
6516   PetscCall(PetscMalloc1(m+1,&Ai));
6517   PetscCall(PetscMalloc1(m+1,&Bi));
6518   PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6519   PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj));
6520 
6521   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6522   PetscCall(PetscMalloc1(Annz1,&Aimap1));
6523   PetscCall(PetscMalloc1(Bnnz1,&Bimap1));
6524   PetscCall(PetscMalloc1(Annz2,&Aimap2));
6525   PetscCall(PetscMalloc1(Bnnz2,&Bimap2));
6526 
6527   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj));
6528   PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj));
6529 
6530   /* --------------------------------------------------------------------------*/
6531   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6532   /* expect nonzeros in A/B most likely have local contributing entries        */
6533   /* --------------------------------------------------------------------------*/
6534   PetscInt Annz = Ai[m];
6535   PetscInt Bnnz = Bi[m];
6536   PetscCount *Ajmap1_new,*Bjmap1_new;
6537 
6538   PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new));
6539   PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new));
6540 
6541   PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new));
6542   PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new));
6543 
6544   PetscCall(PetscFree(Aimap1));
6545   PetscCall(PetscFree(Ajmap1));
6546   PetscCall(PetscFree(Bimap1));
6547   PetscCall(PetscFree(Bjmap1));
6548   PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1));
6549   PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2));
6550   PetscCall(PetscFree(perm1));
6551   PetscCall(PetscFree3(i2,j2,perm2));
6552 
6553   Ajmap1 = Ajmap1_new;
6554   Bjmap1 = Bjmap1_new;
6555 
6556   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6557   if (Annz < Annz1 + Annz2) {
6558     PetscInt *Aj_new;
6559     PetscCall(PetscMalloc1(Annz,&Aj_new));
6560     PetscCall(PetscArraycpy(Aj_new,Aj,Annz));
6561     PetscCall(PetscFree(Aj));
6562     Aj   = Aj_new;
6563   }
6564 
6565   if (Bnnz < Bnnz1 + Bnnz2) {
6566     PetscInt *Bj_new;
6567     PetscCall(PetscMalloc1(Bnnz,&Bj_new));
6568     PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz));
6569     PetscCall(PetscFree(Bj));
6570     Bj   = Bj_new;
6571   }
6572 
6573   /* --------------------------------------------------------------------------------*/
6574   /* Create new submatrices for on-process and off-process coupling                  */
6575   /* --------------------------------------------------------------------------------*/
6576   PetscScalar   *Aa,*Ba;
6577   MatType       rtype;
6578   Mat_SeqAIJ    *a,*b;
6579   PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */
6580   PetscCall(PetscCalloc1(Bnnz,&Ba));
6581   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6582   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6583   PetscCall(MatDestroy(&mpiaij->A));
6584   PetscCall(MatDestroy(&mpiaij->B));
6585   PetscCall(MatGetRootType_Private(mat,&rtype));
6586   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A));
6587   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B));
6588   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6589 
6590   a = (Mat_SeqAIJ*)mpiaij->A->data;
6591   b = (Mat_SeqAIJ*)mpiaij->B->data;
6592   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6593   a->free_a       = b->free_a       = PETSC_TRUE;
6594   a->free_ij      = b->free_ij      = PETSC_TRUE;
6595 
6596   /* conversion must happen AFTER multiply setup */
6597   PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A));
6598   PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B));
6599   PetscCall(VecDestroy(&mpiaij->lvec));
6600   PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL));
6601   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec));
6602 
6603   mpiaij->coo_n   = coo_n;
6604   mpiaij->coo_sf  = sf2;
6605   mpiaij->sendlen = nleaves;
6606   mpiaij->recvlen = nroots;
6607 
6608   mpiaij->Annz    = Annz;
6609   mpiaij->Bnnz    = Bnnz;
6610 
6611   mpiaij->Annz2   = Annz2;
6612   mpiaij->Bnnz2   = Bnnz2;
6613 
6614   mpiaij->Atot1   = Atot1;
6615   mpiaij->Atot2   = Atot2;
6616   mpiaij->Btot1   = Btot1;
6617   mpiaij->Btot2   = Btot2;
6618 
6619   mpiaij->Ajmap1  = Ajmap1;
6620   mpiaij->Aperm1  = Aperm1;
6621 
6622   mpiaij->Bjmap1  = Bjmap1;
6623   mpiaij->Bperm1  = Bperm1;
6624 
6625   mpiaij->Aimap2  = Aimap2;
6626   mpiaij->Ajmap2  = Ajmap2;
6627   mpiaij->Aperm2  = Aperm2;
6628 
6629   mpiaij->Bimap2  = Bimap2;
6630   mpiaij->Bjmap2  = Bjmap2;
6631   mpiaij->Bperm2  = Bperm2;
6632 
6633   mpiaij->Cperm1  = Cperm1;
6634 
6635   /* Allocate in preallocation. If not used, it has zero cost on host */
6636   PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf));
6637   PetscFunctionReturn(0);
6638 }
6639 
6640 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6641 {
6642   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6643   Mat                  A = mpiaij->A,B = mpiaij->B;
6644   PetscCount           Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2;
6645   PetscScalar          *Aa,*Ba;
6646   PetscScalar          *sendbuf = mpiaij->sendbuf;
6647   PetscScalar          *recvbuf = mpiaij->recvbuf;
6648   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2;
6649   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2;
6650   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6651   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6652 
6653   PetscFunctionBegin;
6654   PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */
6655   PetscCall(MatSeqAIJGetArray(B,&Ba));
6656 
6657   /* Pack entries to be sent to remote */
6658   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6659 
6660   /* Send remote entries to their owner and overlap the communication with local computation */
6661   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE));
6662   /* Add local entries to A and B */
6663   for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6664     PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */
6665     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]];
6666     Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum;
6667   }
6668   for (PetscCount i=0; i<Bnnz; i++) {
6669     PetscScalar sum = 0.0;
6670     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]];
6671     Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum;
6672   }
6673   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE));
6674 
6675   /* Add received remote entries to A and B */
6676   for (PetscCount i=0; i<Annz2; i++) {
6677     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6678   }
6679   for (PetscCount i=0; i<Bnnz2; i++) {
6680     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6681   }
6682   PetscCall(MatSeqAIJRestoreArray(A,&Aa));
6683   PetscCall(MatSeqAIJRestoreArray(B,&Ba));
6684   PetscFunctionReturn(0);
6685 }
6686 
6687 /* ----------------------------------------------------------------*/
6688 
6689 /*MC
6690    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6691 
6692    Options Database Keys:
6693 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6694 
6695    Level: beginner
6696 
6697    Notes:
6698     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6699     in this case the values associated with the rows and columns one passes in are set to zero
6700     in the matrix
6701 
6702     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6703     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6704 
6705 .seealso: `MatCreateAIJ()`
6706 M*/
6707 
6708 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6709 {
6710   Mat_MPIAIJ     *b;
6711   PetscMPIInt    size;
6712 
6713   PetscFunctionBegin;
6714   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
6715 
6716   PetscCall(PetscNewLog(B,&b));
6717   B->data       = (void*)b;
6718   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
6719   B->assembled  = PETSC_FALSE;
6720   B->insertmode = NOT_SET_VALUES;
6721   b->size       = size;
6722 
6723   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
6724 
6725   /* build cache for off array entries formed */
6726   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
6727 
6728   b->donotstash  = PETSC_FALSE;
6729   b->colmap      = NULL;
6730   b->garray      = NULL;
6731   b->roworiented = PETSC_TRUE;
6732 
6733   /* stuff used for matrix vector multiply */
6734   b->lvec  = NULL;
6735   b->Mvctx = NULL;
6736 
6737   /* stuff for MatGetRow() */
6738   b->rowindices   = NULL;
6739   b->rowvalues    = NULL;
6740   b->getrowactive = PETSC_FALSE;
6741 
6742   /* flexible pointer used in CUSPARSE classes */
6743   b->spptr = NULL;
6744 
6745   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6746   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ));
6747   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ));
6748   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ));
6749   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ));
6750   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ));
6751   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ));
6752   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ));
6753   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM));
6754   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL));
6755 #if defined(PETSC_HAVE_CUDA)
6756   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6757 #endif
6758 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6759   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos));
6760 #endif
6761 #if defined(PETSC_HAVE_MKL_SPARSE)
6762   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL));
6763 #endif
6764   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL));
6765   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ));
6766   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ));
6767   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
6768 #if defined(PETSC_HAVE_ELEMENTAL)
6769   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental));
6770 #endif
6771 #if defined(PETSC_HAVE_SCALAPACK)
6772   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK));
6773 #endif
6774   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS));
6775   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL));
6776 #if defined(PETSC_HAVE_HYPRE)
6777   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE));
6778   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ));
6779 #endif
6780   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ));
6781   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ));
6782   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ));
6783   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ));
6784   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ));
6785   PetscFunctionReturn(0);
6786 }
6787 
6788 /*@C
6789      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6790          and "off-diagonal" part of the matrix in CSR format.
6791 
6792    Collective
6793 
6794    Input Parameters:
6795 +  comm - MPI communicator
6796 .  m - number of local rows (Cannot be PETSC_DECIDE)
6797 .  n - This value should be the same as the local size used in creating the
6798        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6799        calculated if N is given) For square matrices n is almost always m.
6800 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6801 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6802 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6803 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6804 .   a - matrix values
6805 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6806 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6807 -   oa - matrix values
6808 
6809    Output Parameter:
6810 .   mat - the matrix
6811 
6812    Level: advanced
6813 
6814    Notes:
6815        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6816        must free the arrays once the matrix has been destroyed and not before.
6817 
6818        The i and j indices are 0 based
6819 
6820        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6821 
6822        This sets local rows and cannot be used to set off-processor values.
6823 
6824        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6825        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6826        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6827        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6828        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6829        communication if it is known that only local entries will be set.
6830 
6831 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6832           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6833 @*/
6834 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6835 {
6836   Mat_MPIAIJ     *maij;
6837 
6838   PetscFunctionBegin;
6839   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6840   PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6841   PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6842   PetscCall(MatCreate(comm,mat));
6843   PetscCall(MatSetSizes(*mat,m,n,M,N));
6844   PetscCall(MatSetType(*mat,MATMPIAIJ));
6845   maij = (Mat_MPIAIJ*) (*mat)->data;
6846 
6847   (*mat)->preallocated = PETSC_TRUE;
6848 
6849   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6850   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6851 
6852   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A));
6853   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B));
6854 
6855   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
6856   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
6857   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
6858   PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE));
6859   PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
6860   PetscFunctionReturn(0);
6861 }
6862 
6863 typedef struct {
6864   Mat       *mp;    /* intermediate products */
6865   PetscBool *mptmp; /* is the intermediate product temporary ? */
6866   PetscInt  cp;     /* number of intermediate products */
6867 
6868   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6869   PetscInt    *startsj_s,*startsj_r;
6870   PetscScalar *bufa;
6871   Mat         P_oth;
6872 
6873   /* may take advantage of merging product->B */
6874   Mat Bloc; /* B-local by merging diag and off-diag */
6875 
6876   /* cusparse does not have support to split between symbolic and numeric phases.
6877      When api_user is true, we don't need to update the numerical values
6878      of the temporary storage */
6879   PetscBool reusesym;
6880 
6881   /* support for COO values insertion */
6882   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6883   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6884   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6885   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6886   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6887   PetscMemType mtype;
6888 
6889   /* customization */
6890   PetscBool abmerge;
6891   PetscBool P_oth_bind;
6892 } MatMatMPIAIJBACKEND;
6893 
6894 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6895 {
6896   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6897   PetscInt            i;
6898 
6899   PetscFunctionBegin;
6900   PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r));
6901   PetscCall(PetscFree(mmdata->bufa));
6902   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v));
6903   PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w));
6904   PetscCall(MatDestroy(&mmdata->P_oth));
6905   PetscCall(MatDestroy(&mmdata->Bloc));
6906   PetscCall(PetscSFDestroy(&mmdata->sf));
6907   for (i = 0; i < mmdata->cp; i++) {
6908     PetscCall(MatDestroy(&mmdata->mp[i]));
6909   }
6910   PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp));
6911   PetscCall(PetscFree(mmdata->own[0]));
6912   PetscCall(PetscFree(mmdata->own));
6913   PetscCall(PetscFree(mmdata->off[0]));
6914   PetscCall(PetscFree(mmdata->off));
6915   PetscCall(PetscFree(mmdata));
6916   PetscFunctionReturn(0);
6917 }
6918 
6919 /* Copy selected n entries with indices in idx[] of A to v[].
6920    If idx is NULL, copy the whole data array of A to v[]
6921  */
6922 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6923 {
6924   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6925 
6926   PetscFunctionBegin;
6927   PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f));
6928   if (f) {
6929     PetscCall((*f)(A,n,idx,v));
6930   } else {
6931     const PetscScalar *vv;
6932 
6933     PetscCall(MatSeqAIJGetArrayRead(A,&vv));
6934     if (n && idx) {
6935       PetscScalar    *w = v;
6936       const PetscInt *oi = idx;
6937       PetscInt       j;
6938 
6939       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6940     } else {
6941       PetscCall(PetscArraycpy(v,vv,n));
6942     }
6943     PetscCall(MatSeqAIJRestoreArrayRead(A,&vv));
6944   }
6945   PetscFunctionReturn(0);
6946 }
6947 
6948 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6949 {
6950   MatMatMPIAIJBACKEND *mmdata;
6951   PetscInt            i,n_d,n_o;
6952 
6953   PetscFunctionBegin;
6954   MatCheckProduct(C,1);
6955   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6956   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6957   if (!mmdata->reusesym) { /* update temporary matrices */
6958     if (mmdata->P_oth) {
6959       PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
6960     }
6961     if (mmdata->Bloc) {
6962       PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc));
6963     }
6964   }
6965   mmdata->reusesym = PETSC_FALSE;
6966 
6967   for (i = 0; i < mmdata->cp; i++) {
6968     PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6969     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6970   }
6971   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6972     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6973 
6974     if (mmdata->mptmp[i]) continue;
6975     if (noff) {
6976       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6977 
6978       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o));
6979       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d));
6980       n_o += noff;
6981       n_d += nown;
6982     } else {
6983       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6984 
6985       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d));
6986       n_d += mm->nz;
6987     }
6988   }
6989   if (mmdata->hasoffproc) { /* offprocess insertion */
6990     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6991     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d));
6992   }
6993   PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES));
6994   PetscFunctionReturn(0);
6995 }
6996 
6997 /* Support for Pt * A, A * P, or Pt * A * P */
6998 #define MAX_NUMBER_INTERMEDIATE 4
6999 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7000 {
7001   Mat_Product            *product = C->product;
7002   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7003   Mat_MPIAIJ             *a,*p;
7004   MatMatMPIAIJBACKEND    *mmdata;
7005   ISLocalToGlobalMapping P_oth_l2g = NULL;
7006   IS                     glob = NULL;
7007   const char             *prefix;
7008   char                   pprefix[256];
7009   const PetscInt         *globidx,*P_oth_idx;
7010   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
7011   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
7012   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7013                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
7014                                                                                         /* a base offset; type-2: sparse with a local to global map table */
7015   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
7016 
7017   MatProductType         ptype;
7018   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
7019   PetscMPIInt            size;
7020 
7021   PetscFunctionBegin;
7022   MatCheckProduct(C,1);
7023   PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
7024   ptype = product->type;
7025   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7026     ptype = MATPRODUCT_AB;
7027     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7028   }
7029   switch (ptype) {
7030   case MATPRODUCT_AB:
7031     A = product->A;
7032     P = product->B;
7033     m = A->rmap->n;
7034     n = P->cmap->n;
7035     M = A->rmap->N;
7036     N = P->cmap->N;
7037     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7038     break;
7039   case MATPRODUCT_AtB:
7040     P = product->A;
7041     A = product->B;
7042     m = P->cmap->n;
7043     n = A->cmap->n;
7044     M = P->cmap->N;
7045     N = A->cmap->N;
7046     hasoffproc = PETSC_TRUE;
7047     break;
7048   case MATPRODUCT_PtAP:
7049     A = product->A;
7050     P = product->B;
7051     m = P->cmap->n;
7052     n = P->cmap->n;
7053     M = P->cmap->N;
7054     N = P->cmap->N;
7055     hasoffproc = PETSC_TRUE;
7056     break;
7057   default:
7058     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7059   }
7060   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size));
7061   if (size == 1) hasoffproc = PETSC_FALSE;
7062 
7063   /* defaults */
7064   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
7065     mp[i]    = NULL;
7066     mptmp[i] = PETSC_FALSE;
7067     rmapt[i] = -1;
7068     cmapt[i] = -1;
7069     rmapa[i] = NULL;
7070     cmapa[i] = NULL;
7071   }
7072 
7073   /* customization */
7074   PetscCall(PetscNew(&mmdata));
7075   mmdata->reusesym = product->api_user;
7076   if (ptype == MATPRODUCT_AB) {
7077     if (product->api_user) {
7078       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
7079       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7080       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7081       PetscOptionsEnd();
7082     } else {
7083       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
7084       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL));
7085       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7086       PetscOptionsEnd();
7087     }
7088   } else if (ptype == MATPRODUCT_PtAP) {
7089     if (product->api_user) {
7090       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
7091       PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7092       PetscOptionsEnd();
7093     } else {
7094       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
7095       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL));
7096       PetscOptionsEnd();
7097     }
7098   }
7099   a = (Mat_MPIAIJ*)A->data;
7100   p = (Mat_MPIAIJ*)P->data;
7101   PetscCall(MatSetSizes(C,m,n,M,N));
7102   PetscCall(PetscLayoutSetUp(C->rmap));
7103   PetscCall(PetscLayoutSetUp(C->cmap));
7104   PetscCall(MatSetType(C,((PetscObject)A)->type_name));
7105   PetscCall(MatGetOptionsPrefix(C,&prefix));
7106 
7107   cp   = 0;
7108   switch (ptype) {
7109   case MATPRODUCT_AB: /* A * P */
7110     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7111 
7112     /* A_diag * P_local (merged or not) */
7113     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7114       /* P is product->B */
7115       PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7116       PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7117       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7118       PetscCall(MatProductSetFill(mp[cp],product->fill));
7119       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7120       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7121       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7122       mp[cp]->product->api_user = product->api_user;
7123       PetscCall(MatProductSetFromOptions(mp[cp]));
7124       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7125       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7126       PetscCall(ISGetIndices(glob,&globidx));
7127       rmapt[cp] = 1;
7128       cmapt[cp] = 2;
7129       cmapa[cp] = globidx;
7130       mptmp[cp] = PETSC_FALSE;
7131       cp++;
7132     } else { /* A_diag * P_diag and A_diag * P_off */
7133       PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp]));
7134       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7135       PetscCall(MatProductSetFill(mp[cp],product->fill));
7136       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7137       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7138       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7139       mp[cp]->product->api_user = product->api_user;
7140       PetscCall(MatProductSetFromOptions(mp[cp]));
7141       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7142       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7143       rmapt[cp] = 1;
7144       cmapt[cp] = 1;
7145       mptmp[cp] = PETSC_FALSE;
7146       cp++;
7147       PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp]));
7148       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7149       PetscCall(MatProductSetFill(mp[cp],product->fill));
7150       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7151       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7152       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7153       mp[cp]->product->api_user = product->api_user;
7154       PetscCall(MatProductSetFromOptions(mp[cp]));
7155       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7156       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7157       rmapt[cp] = 1;
7158       cmapt[cp] = 2;
7159       cmapa[cp] = p->garray;
7160       mptmp[cp] = PETSC_FALSE;
7161       cp++;
7162     }
7163 
7164     /* A_off * P_other */
7165     if (mmdata->P_oth) {
7166       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */
7167       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7168       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7169       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7170       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7171       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7172       PetscCall(MatProductSetFill(mp[cp],product->fill));
7173       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7174       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7175       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7176       mp[cp]->product->api_user = product->api_user;
7177       PetscCall(MatProductSetFromOptions(mp[cp]));
7178       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7179       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7180       rmapt[cp] = 1;
7181       cmapt[cp] = 2;
7182       cmapa[cp] = P_oth_idx;
7183       mptmp[cp] = PETSC_FALSE;
7184       cp++;
7185     }
7186     break;
7187 
7188   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7189     /* A is product->B */
7190     PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7191     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7192       PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]));
7193       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7194       PetscCall(MatProductSetFill(mp[cp],product->fill));
7195       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7196       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7197       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7198       mp[cp]->product->api_user = product->api_user;
7199       PetscCall(MatProductSetFromOptions(mp[cp]));
7200       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7201       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7202       PetscCall(ISGetIndices(glob,&globidx));
7203       rmapt[cp] = 2;
7204       rmapa[cp] = globidx;
7205       cmapt[cp] = 2;
7206       cmapa[cp] = globidx;
7207       mptmp[cp] = PETSC_FALSE;
7208       cp++;
7209     } else {
7210       PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]));
7211       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7212       PetscCall(MatProductSetFill(mp[cp],product->fill));
7213       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7214       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7215       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7216       mp[cp]->product->api_user = product->api_user;
7217       PetscCall(MatProductSetFromOptions(mp[cp]));
7218       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7219       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7220       PetscCall(ISGetIndices(glob,&globidx));
7221       rmapt[cp] = 1;
7222       cmapt[cp] = 2;
7223       cmapa[cp] = globidx;
7224       mptmp[cp] = PETSC_FALSE;
7225       cp++;
7226       PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]));
7227       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7228       PetscCall(MatProductSetFill(mp[cp],product->fill));
7229       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7230       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7231       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7232       mp[cp]->product->api_user = product->api_user;
7233       PetscCall(MatProductSetFromOptions(mp[cp]));
7234       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7235       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7236       rmapt[cp] = 2;
7237       rmapa[cp] = p->garray;
7238       cmapt[cp] = 2;
7239       cmapa[cp] = globidx;
7240       mptmp[cp] = PETSC_FALSE;
7241       cp++;
7242     }
7243     break;
7244   case MATPRODUCT_PtAP:
7245     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth));
7246     /* P is product->B */
7247     PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc));
7248     PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]));
7249     PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP));
7250     PetscCall(MatProductSetFill(mp[cp],product->fill));
7251     PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7252     PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7253     PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7254     mp[cp]->product->api_user = product->api_user;
7255     PetscCall(MatProductSetFromOptions(mp[cp]));
7256     PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7257     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7258     PetscCall(ISGetIndices(glob,&globidx));
7259     rmapt[cp] = 2;
7260     rmapa[cp] = globidx;
7261     cmapt[cp] = 2;
7262     cmapa[cp] = globidx;
7263     mptmp[cp] = PETSC_FALSE;
7264     cp++;
7265     if (mmdata->P_oth) {
7266       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g));
7267       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx));
7268       PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name));
7269       PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind));
7270       PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]));
7271       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB));
7272       PetscCall(MatProductSetFill(mp[cp],product->fill));
7273       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7274       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7275       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7276       mp[cp]->product->api_user = product->api_user;
7277       PetscCall(MatProductSetFromOptions(mp[cp]));
7278       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7279       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7280       mptmp[cp] = PETSC_TRUE;
7281       cp++;
7282       PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]));
7283       PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB));
7284       PetscCall(MatProductSetFill(mp[cp],product->fill));
7285       PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp));
7286       PetscCall(MatSetOptionsPrefix(mp[cp],prefix));
7287       PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix));
7288       mp[cp]->product->api_user = product->api_user;
7289       PetscCall(MatProductSetFromOptions(mp[cp]));
7290       PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7291       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7292       rmapt[cp] = 2;
7293       rmapa[cp] = globidx;
7294       cmapt[cp] = 2;
7295       cmapa[cp] = P_oth_idx;
7296       mptmp[cp] = PETSC_FALSE;
7297       cp++;
7298     }
7299     break;
7300   default:
7301     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7302   }
7303   /* sanity check */
7304   if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7305 
7306   PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp));
7307   for (i = 0; i < cp; i++) {
7308     mmdata->mp[i]    = mp[i];
7309     mmdata->mptmp[i] = mptmp[i];
7310   }
7311   mmdata->cp = cp;
7312   C->product->data       = mmdata;
7313   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7314   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7315 
7316   /* memory type */
7317   mmdata->mtype = PETSC_MEMTYPE_HOST;
7318   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,""));
7319   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,""));
7320   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7321   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7322 
7323   /* prepare coo coordinates for values insertion */
7324 
7325   /* count total nonzeros of those intermediate seqaij Mats
7326     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7327     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7328     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7329   */
7330   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7331     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7332     if (mptmp[cp]) continue;
7333     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7334       const PetscInt *rmap = rmapa[cp];
7335       const PetscInt mr = mp[cp]->rmap->n;
7336       const PetscInt rs = C->rmap->rstart;
7337       const PetscInt re = C->rmap->rend;
7338       const PetscInt *ii  = mm->i;
7339       for (i = 0; i < mr; i++) {
7340         const PetscInt gr = rmap[i];
7341         const PetscInt nz = ii[i+1] - ii[i];
7342         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7343         else ncoo_oown += nz; /* this row is local */
7344       }
7345     } else ncoo_d += mm->nz;
7346   }
7347 
7348   /*
7349     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7350 
7351     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7352 
7353     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7354 
7355     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7356     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7357     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7358 
7359     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7360     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7361   */
7362   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */
7363   PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own));
7364 
7365   /* gather (i,j) of nonzeros inserted by remote procs */
7366   if (hasoffproc) {
7367     PetscSF  msf;
7368     PetscInt ncoo2,*coo_i2,*coo_j2;
7369 
7370     PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0]));
7371     PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0]));
7372     PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */
7373 
7374     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7375       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7376       PetscInt   *idxoff = mmdata->off[cp];
7377       PetscInt   *idxown = mmdata->own[cp];
7378       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7379         const PetscInt *rmap = rmapa[cp];
7380         const PetscInt *cmap = cmapa[cp];
7381         const PetscInt *ii  = mm->i;
7382         PetscInt       *coi = coo_i + ncoo_o;
7383         PetscInt       *coj = coo_j + ncoo_o;
7384         const PetscInt mr = mp[cp]->rmap->n;
7385         const PetscInt rs = C->rmap->rstart;
7386         const PetscInt re = C->rmap->rend;
7387         const PetscInt cs = C->cmap->rstart;
7388         for (i = 0; i < mr; i++) {
7389           const PetscInt *jj = mm->j + ii[i];
7390           const PetscInt gr  = rmap[i];
7391           const PetscInt nz  = ii[i+1] - ii[i];
7392           if (gr < rs || gr >= re) { /* this is an offproc row */
7393             for (j = ii[i]; j < ii[i+1]; j++) {
7394               *coi++ = gr;
7395               *idxoff++ = j;
7396             }
7397             if (!cmapt[cp]) { /* already global */
7398               for (j = 0; j < nz; j++) *coj++ = jj[j];
7399             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7400               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7401             } else { /* offdiag */
7402               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7403             }
7404             ncoo_o += nz;
7405           } else { /* this is a local row */
7406             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7407           }
7408         }
7409       }
7410       mmdata->off[cp + 1] = idxoff;
7411       mmdata->own[cp + 1] = idxown;
7412     }
7413 
7414     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7415     PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i));
7416     PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf));
7417     PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL));
7418     ncoo = ncoo_d + ncoo_oown + ncoo2;
7419     PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2));
7420     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7421     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown));
7422     PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7423     PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown));
7424     PetscCall(PetscFree2(coo_i,coo_j));
7425     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7426     PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w));
7427     coo_i = coo_i2;
7428     coo_j = coo_j2;
7429   } else { /* no offproc values insertion */
7430     ncoo = ncoo_d;
7431     PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j));
7432 
7433     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf));
7434     PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER));
7435     PetscCall(PetscSFSetUp(mmdata->sf));
7436   }
7437   mmdata->hasoffproc = hasoffproc;
7438 
7439   /* gather (i,j) of nonzeros inserted locally */
7440   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7441     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7442     PetscInt       *coi = coo_i + ncoo_d;
7443     PetscInt       *coj = coo_j + ncoo_d;
7444     const PetscInt *jj  = mm->j;
7445     const PetscInt *ii  = mm->i;
7446     const PetscInt *cmap = cmapa[cp];
7447     const PetscInt *rmap = rmapa[cp];
7448     const PetscInt mr = mp[cp]->rmap->n;
7449     const PetscInt rs = C->rmap->rstart;
7450     const PetscInt re = C->rmap->rend;
7451     const PetscInt cs = C->cmap->rstart;
7452 
7453     if (mptmp[cp]) continue;
7454     if (rmapt[cp] == 1) { /* consecutive rows */
7455       /* fill coo_i */
7456       for (i = 0; i < mr; i++) {
7457         const PetscInt gr = i + rs;
7458         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7459       }
7460       /* fill coo_j */
7461       if (!cmapt[cp]) { /* type-0, already global */
7462         PetscCall(PetscArraycpy(coj,jj,mm->nz));
7463       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7464         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7465       } else { /* type-2, local to global for sparse columns */
7466         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7467       }
7468       ncoo_d += mm->nz;
7469     } else if (rmapt[cp] == 2) { /* sparse rows */
7470       for (i = 0; i < mr; i++) {
7471         const PetscInt *jj = mm->j + ii[i];
7472         const PetscInt gr  = rmap[i];
7473         const PetscInt nz  = ii[i+1] - ii[i];
7474         if (gr >= rs && gr < re) { /* local rows */
7475           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7476           if (!cmapt[cp]) { /* type-0, already global */
7477             for (j = 0; j < nz; j++) *coj++ = jj[j];
7478           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7479             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7480           } else { /* type-2, local to global for sparse columns */
7481             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7482           }
7483           ncoo_d += nz;
7484         }
7485       }
7486     }
7487   }
7488   if (glob) {
7489     PetscCall(ISRestoreIndices(glob,&globidx));
7490   }
7491   PetscCall(ISDestroy(&glob));
7492   if (P_oth_l2g) {
7493     PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx));
7494   }
7495   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7496   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7497   PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v));
7498 
7499   /* preallocate with COO data */
7500   PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j));
7501   PetscCall(PetscFree2(coo_i,coo_j));
7502   PetscFunctionReturn(0);
7503 }
7504 
7505 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7506 {
7507   Mat_Product *product = mat->product;
7508 #if defined(PETSC_HAVE_DEVICE)
7509   PetscBool    match   = PETSC_FALSE;
7510   PetscBool    usecpu  = PETSC_FALSE;
7511 #else
7512   PetscBool    match   = PETSC_TRUE;
7513 #endif
7514 
7515   PetscFunctionBegin;
7516   MatCheckProduct(mat,1);
7517 #if defined(PETSC_HAVE_DEVICE)
7518   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7519     PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match));
7520   }
7521   if (match) { /* we can always fallback to the CPU if requested */
7522     switch (product->type) {
7523     case MATPRODUCT_AB:
7524       if (product->api_user) {
7525         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7526         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7527         PetscOptionsEnd();
7528       } else {
7529         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7530         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL));
7531         PetscOptionsEnd();
7532       }
7533       break;
7534     case MATPRODUCT_AtB:
7535       if (product->api_user) {
7536         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7537         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7538         PetscOptionsEnd();
7539       } else {
7540         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7541         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL));
7542         PetscOptionsEnd();
7543       }
7544       break;
7545     case MATPRODUCT_PtAP:
7546       if (product->api_user) {
7547         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7548         PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7549         PetscOptionsEnd();
7550       } else {
7551         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7552         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL));
7553         PetscOptionsEnd();
7554       }
7555       break;
7556     default:
7557       break;
7558     }
7559     match = (PetscBool)!usecpu;
7560   }
7561 #endif
7562   if (match) {
7563     switch (product->type) {
7564     case MATPRODUCT_AB:
7565     case MATPRODUCT_AtB:
7566     case MATPRODUCT_PtAP:
7567       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7568       break;
7569     default:
7570       break;
7571     }
7572   }
7573   /* fallback to MPIAIJ ops */
7574   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7575   PetscFunctionReturn(0);
7576 }
7577 
7578 /*
7579    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7580 
7581    n - the number of block indices in cc[]
7582    cc - the block indices (must be large enough to contain the indices)
7583 */
7584 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
7585 {
7586   PetscInt       cnt = -1,nidx,j;
7587   const PetscInt *idx;
7588 
7589   PetscFunctionBegin;
7590   PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL));
7591   if (nidx) {
7592     cnt = 0;
7593     cc[cnt] = idx[0]/bs;
7594     for (j=1; j<nidx; j++) {
7595       if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
7596     }
7597   }
7598   PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL));
7599   *n = cnt+1;
7600   PetscFunctionReturn(0);
7601 }
7602 
7603 /*
7604     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7605 
7606     ncollapsed - the number of block indices
7607     collapsed - the block indices (must be large enough to contain the indices)
7608 */
7609 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
7610 {
7611   PetscInt       i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
7612 
7613   PetscFunctionBegin;
7614   PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev));
7615   for (i=start+1; i<start+bs; i++) {
7616     PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur));
7617     PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged));
7618     cprevtmp = cprev; cprev = merged; merged = cprevtmp;
7619   }
7620   *ncollapsed = nprev;
7621   if (collapsed) *collapsed  = cprev;
7622   PetscFunctionReturn(0);
7623 }
7624 
7625 /* -------------------------------------------------------------------------- */
7626 /*
7627  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7628 
7629  Input Parameter:
7630  . Amat - matrix
7631  - symmetrize - make the result symmetric
7632  + scale - scale with diagonal
7633 
7634  Output Parameter:
7635  . a_Gmat - output scalar graph >= 0
7636 
7637  */
7638 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat)
7639 {
7640   PetscInt       Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
7641   MPI_Comm       comm;
7642   Mat            Gmat;
7643   PetscBool      ismpiaij,isseqaij;
7644   Mat            a, b, c;
7645   MatType        jtype;
7646 
7647   PetscFunctionBegin;
7648   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
7649   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7650   PetscCall(MatGetSize(Amat, &MM, &NN));
7651   PetscCall(MatGetBlockSize(Amat, &bs));
7652   nloc = (Iend-Istart)/bs;
7653 
7654   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij));
7655   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij));
7656   PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
7657 
7658   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7659   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7660      implementation */
7661   if (bs > 1) {
7662     PetscCall(MatGetType(Amat,&jtype));
7663     PetscCall(MatCreate(comm, &Gmat));
7664     PetscCall(MatSetType(Gmat, jtype));
7665     PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE));
7666     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7667     if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) {
7668       PetscInt  *d_nnz, *o_nnz;
7669       MatScalar *aa,val,AA[4096];
7670       PetscInt  *aj,*ai,AJ[4096],nc;
7671       if (isseqaij) { a = Amat; b = NULL; }
7672       else {
7673         Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data;
7674         a = d->A; b = d->B;
7675       }
7676       PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc));
7677       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7678       for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7679         PetscInt       *nnz = (c==a) ? d_nnz : o_nnz, nmax=0;
7680         const PetscInt *cols;
7681         for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows
7682           PetscCall(MatGetRow(c,brow,&jj,&cols,NULL));
7683           nnz[brow/bs] = jj/bs;
7684           if (jj%bs) ok = 0;
7685           if (cols) j0 = cols[0];
7686           else j0 = -1;
7687           PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL));
7688           if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs];
7689           for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks
7690             PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL));
7691             if (jj%bs) ok = 0;
7692             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7693             if (nnz[brow/bs] != jj/bs) ok = 0;
7694             PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL));
7695           }
7696           if (!ok) {
7697             PetscCall(PetscFree2(d_nnz,o_nnz));
7698             goto old_bs;
7699           }
7700         }
7701         PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax);
7702       }
7703       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7704       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7705       PetscCall(PetscFree2(d_nnz,o_nnz));
7706       // diag
7707       for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows
7708         Mat_SeqAIJ *aseq  = (Mat_SeqAIJ*)a->data;
7709         ai = aseq->i;
7710         n  = ai[brow+1] - ai[brow];
7711         aj = aseq->j + ai[brow];
7712         for (int k=0; k<n; k += bs) { // block columns
7713           AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart)
7714           val = 0;
7715           for (int ii=0; ii<bs; ii++) { // rows in block
7716             aa = aseq->a + ai[brow+ii] + k;
7717             for (int jj=0; jj<bs; jj++) { // columns in block
7718               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7719             }
7720           }
7721           AA[k/bs] = val;
7722         }
7723         grow = Istart/bs + brow/bs;
7724         PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES));
7725       }
7726       // off-diag
7727       if (ismpiaij) {
7728         Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)Amat->data;
7729         const PetscScalar *vals;
7730         const PetscInt    *cols, *garray = aij->garray;
7731         PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?");
7732         for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows
7733           PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL));
7734           for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) {
7735             AA[k/bs] = 0;
7736             AJ[cidx] = garray[cols[k]]/bs;
7737           }
7738           nc = ncols/bs;
7739           PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL));
7740           for (int ii=0; ii<bs; ii++) { // rows in block
7741             PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals));
7742             for (int k=0; k<ncols; k += bs) {
7743               for (int jj=0; jj<bs; jj++) { // cols in block
7744                 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj]));
7745               }
7746             }
7747             PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals));
7748           }
7749           grow = Istart/bs + brow/bs;
7750           PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES));
7751         }
7752       }
7753       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7754       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7755     } else {
7756       const PetscScalar *vals;
7757       const PetscInt    *idx;
7758       PetscInt          *d_nnz, *o_nnz,*w0,*w1,*w2;
7759       old_bs:
7760       /*
7761        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7762        */
7763       PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n"));
7764       PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz));
7765       if (isseqaij) {
7766         PetscInt max_d_nnz;
7767         /*
7768          Determine exact preallocation count for (sequential) scalar matrix
7769          */
7770         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz));
7771         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7772         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7773         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
7774           PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7775         }
7776         PetscCall(PetscFree3(w0,w1,w2));
7777       } else if (ismpiaij) {
7778         Mat            Daij,Oaij;
7779         const PetscInt *garray;
7780         PetscInt       max_d_nnz;
7781         PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray));
7782         /*
7783          Determine exact preallocation count for diagonal block portion of scalar matrix
7784          */
7785         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz));
7786         max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
7787         PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2));
7788         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7789           PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL));
7790         }
7791         PetscCall(PetscFree3(w0,w1,w2));
7792         /*
7793          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7794          */
7795         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7796           o_nnz[jj] = 0;
7797           for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
7798             PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7799             o_nnz[jj] += ncols;
7800             PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL));
7801           }
7802           if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
7803         }
7804       } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type");
7805       /* get scalar copy (norms) of matrix */
7806       PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz));
7807       PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz));
7808       PetscCall(PetscFree2(d_nnz,o_nnz));
7809       for (Ii = Istart; Ii < Iend; Ii++) {
7810         PetscInt dest_row = Ii/bs;
7811         PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals));
7812         for (jj=0; jj<ncols; jj++) {
7813           PetscInt    dest_col = idx[jj]/bs;
7814           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7815           PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES));
7816         }
7817         PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals));
7818       }
7819       PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY));
7820       PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY));
7821     }
7822   } else {
7823     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7824      procedure via MatAbs API */
7825     /* just copy scalar matrix & abs() */
7826     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7827     if (isseqaij) { a = Gmat; b = NULL; }
7828     else {
7829       Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7830       a = d->A; b = d->B;
7831     }
7832     /* abs */
7833     for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7834       MatInfo     info;
7835       PetscScalar *avals;
7836       PetscCall(MatGetInfo(c,MAT_LOCAL,&info));
7837       PetscCall(MatSeqAIJGetArray(c,&avals));
7838       for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7839       PetscCall(MatSeqAIJRestoreArray(c,&avals));
7840     }
7841   }
7842   if (symmetrize) {
7843     PetscBool isset,issym;
7844     PetscCall(MatIsSymmetricKnown(Amat,&isset,&issym));
7845     if (!isset || !issym) {
7846       Mat matTrans;
7847       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7848       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7849       PetscCall(MatDestroy(&matTrans));
7850     }
7851     PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE));
7852   } else {
7853     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7854   }
7855   if (scale) {
7856     /* scale c for all diagonal values = 1 or -1 */
7857     Vec               diag;
7858     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7859     PetscCall(MatGetDiagonal(Gmat, diag));
7860     PetscCall(VecReciprocal(diag));
7861     PetscCall(VecSqrtAbs(diag));
7862     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7863     PetscCall(VecDestroy(&diag));
7864   }
7865   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7866   *a_Gmat = Gmat;
7867   PetscFunctionReturn(0);
7868 }
7869 
7870 /* -------------------------------------------------------------------------- */
7871 /*@C
7872    MatFilter_AIJ - filter values with small absolute values
7873      With vfilter < 0 does nothing so should not be called.
7874 
7875    Collective on Mat
7876 
7877    Input Parameters:
7878 +   Gmat - the graph
7879 .   vfilter - threshold parameter [0,1)
7880 
7881  Output Parameter:
7882  .  filteredG - output filtered scalar graph
7883 
7884    Level: developer
7885 
7886    Notes:
7887     This is called before graph coarsers are called.
7888     This could go into Mat, move 'symm' to GAMG
7889 
7890 .seealso: `PCGAMGSetThreshold()`
7891 @*/
7892 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG)
7893 {
7894   PetscInt          Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc;
7895   Mat               tGmat;
7896   MPI_Comm          comm;
7897   const PetscScalar *vals;
7898   const PetscInt    *idx;
7899   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0;
7900   MatScalar         *AA; // this is checked in graph
7901   PetscBool         isseqaij;
7902   Mat               a, b, c;
7903   MatType           jtype;
7904 
7905   PetscFunctionBegin;
7906   PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm));
7907   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij));
7908   PetscCall(MatGetType(Gmat,&jtype));
7909   PetscCall(MatCreate(comm, &tGmat));
7910   PetscCall(MatSetType(tGmat, jtype));
7911 
7912   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7913                Also, if the matrix is symmetric, can we skip this
7914                operation? It can be very expensive on large matrices. */
7915 
7916   // global sizes
7917   PetscCall(MatGetSize(Gmat, &MM, &NN));
7918   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7919   nloc = Iend - Istart;
7920   PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz));
7921   if (isseqaij) { a = Gmat; b = NULL; }
7922   else {
7923     Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data;
7924     a = d->A; b = d->B;
7925     garray = d->garray;
7926   }
7927   /* Determine upper bound on non-zeros needed in new filtered matrix */
7928   for (PetscInt row=0; row < nloc; row++) {
7929     PetscCall(MatGetRow(a,row,&ncols,NULL,NULL));
7930     d_nnz[row] = ncols;
7931     if (ncols>maxcols) maxcols=ncols;
7932     PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL));
7933   }
7934   if (b) {
7935     for (PetscInt row=0; row < nloc; row++) {
7936       PetscCall(MatGetRow(b,row,&ncols,NULL,NULL));
7937       o_nnz[row] = ncols;
7938       if (ncols>maxcols) maxcols=ncols;
7939       PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL));
7940     }
7941   }
7942   PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM));
7943   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7944   PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz));
7945   PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz));
7946   PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
7947   PetscCall(PetscFree2(d_nnz,o_nnz));
7948   //
7949   PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ));
7950   nnz0 = nnz1 = 0;
7951   for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){
7952     for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) {
7953       PetscCall(MatGetRow(c,row,&ncols,&idx,&vals));
7954       for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) {
7955         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7956         if (PetscRealPart(sv) > vfilter) {
7957           nnz1++;
7958           PetscInt cid = idx[jj] + Istart; //diag
7959           if (c!=a) cid = garray[idx[jj]];
7960           AA[ncol_row] = vals[jj];
7961           AJ[ncol_row] = cid;
7962           ncol_row++;
7963         }
7964       }
7965       PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals));
7966       PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES));
7967     }
7968   }
7969   PetscCall(PetscFree2(AA,AJ));
7970   PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY));
7971   PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY));
7972   PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */
7973 
7974   PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n",
7975                       (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter,
7976                       (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols));
7977 
7978   *filteredG = tGmat;
7979   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7980   PetscFunctionReturn(0);
7981 }
7982 
7983 /*
7984     Special version for direct calls from Fortran
7985 */
7986 #include <petsc/private/fortranimpl.h>
7987 
7988 /* Change these macros so can be used in void function */
7989 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7990 #undef  PetscCall
7991 #define PetscCall(...) do {                                                                    \
7992     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7993     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7994       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7995       return;                                                                                  \
7996     }                                                                                          \
7997   } while (0)
7998 
7999 #undef SETERRQ
8000 #define SETERRQ(comm,ierr,...) do {                                                            \
8001     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
8002     return;                                                                                    \
8003   } while (0)
8004 
8005 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8006 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8007 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8008 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8009 #else
8010 #endif
8011 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
8012 {
8013   Mat          mat  = *mmat;
8014   PetscInt     m    = *mm, n = *mn;
8015   InsertMode   addv = *maddv;
8016   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
8017   PetscScalar  value;
8018 
8019   MatCheckPreallocated(mat,1);
8020   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8021   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
8022   {
8023     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
8024     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
8025     PetscBool roworiented = aij->roworiented;
8026 
8027     /* Some Variables required in the macro */
8028     Mat        A                    = aij->A;
8029     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
8030     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
8031     MatScalar  *aa;
8032     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8033     Mat        B                    = aij->B;
8034     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
8035     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
8036     MatScalar  *ba;
8037     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8038      * cannot use "#if defined" inside a macro. */
8039     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8040 
8041     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
8042     PetscInt  nonew = a->nonew;
8043     MatScalar *ap1,*ap2;
8044 
8045     PetscFunctionBegin;
8046     PetscCall(MatSeqAIJGetArray(A,&aa));
8047     PetscCall(MatSeqAIJGetArray(B,&ba));
8048     for (i=0; i<m; i++) {
8049       if (im[i] < 0) continue;
8050       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
8051       if (im[i] >= rstart && im[i] < rend) {
8052         row      = im[i] - rstart;
8053         lastcol1 = -1;
8054         rp1      = aj + ai[row];
8055         ap1      = aa + ai[row];
8056         rmax1    = aimax[row];
8057         nrow1    = ailen[row];
8058         low1     = 0;
8059         high1    = nrow1;
8060         lastcol2 = -1;
8061         rp2      = bj + bi[row];
8062         ap2      = ba + bi[row];
8063         rmax2    = bimax[row];
8064         nrow2    = bilen[row];
8065         low2     = 0;
8066         high2    = nrow2;
8067 
8068         for (j=0; j<n; j++) {
8069           if (roworiented) value = v[i*n+j];
8070           else value = v[i+j*m];
8071           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8072           if (in[j] >= cstart && in[j] < cend) {
8073             col = in[j] - cstart;
8074             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
8075           } else if (in[j] < 0) continue;
8076           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8077             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8078             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
8079           } else {
8080             if (mat->was_assembled) {
8081               if (!aij->colmap) {
8082                 PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8083               }
8084 #if defined(PETSC_USE_CTABLE)
8085               PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col));
8086               col--;
8087 #else
8088               col = aij->colmap[in[j]] - 1;
8089 #endif
8090               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
8091                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8092                 col  =  in[j];
8093                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8094                 B        = aij->B;
8095                 b        = (Mat_SeqAIJ*)B->data;
8096                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
8097                 rp2      = bj + bi[row];
8098                 ap2      = ba + bi[row];
8099                 rmax2    = bimax[row];
8100                 nrow2    = bilen[row];
8101                 low2     = 0;
8102                 high2    = nrow2;
8103                 bm       = aij->B->rmap->n;
8104                 ba       = b->a;
8105                 inserted = PETSC_FALSE;
8106               }
8107             } else col = in[j];
8108             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
8109           }
8110         }
8111       } else if (!aij->donotstash) {
8112         if (roworiented) {
8113           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8114         } else {
8115           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8116         }
8117       }
8118     }
8119     PetscCall(MatSeqAIJRestoreArray(A,&aa));
8120     PetscCall(MatSeqAIJRestoreArray(B,&ba));
8121   }
8122   PetscFunctionReturnVoid();
8123 }
8124 
8125 /* Undefining these here since they were redefined from their original definition above! No
8126  * other PETSc functions should be defined past this point, as it is impossible to recover the
8127  * original definitions */
8128 #undef PetscCall
8129 #undef SETERRQ
8130