xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 91e63d38360eb9bc922f79d792328cc4769c01ac)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62 
63   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
64    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
65    * to differ from the parent matrix. */
66   if (a->lvec) {
67     ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr);
68   }
69   if (a->diag) {
70     ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr);
71   }
72 
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
77 {
78   PetscErrorCode ierr;
79   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
80 
81   PetscFunctionBegin;
82   if (mat->A) {
83     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
84     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
90 {
91   PetscErrorCode  ierr;
92   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
93   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
94   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
95   const PetscInt  *ia,*ib;
96   const MatScalar *aa,*bb,*aav,*bav;
97   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
98   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
99 
100   PetscFunctionBegin;
101   *keptrows = NULL;
102 
103   ia   = a->i;
104   ib   = b->i;
105   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
106   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
107   for (i=0; i<m; i++) {
108     na = ia[i+1] - ia[i];
109     nb = ib[i+1] - ib[i];
110     if (!na && !nb) {
111       cnt++;
112       goto ok1;
113     }
114     aa = aav + ia[i];
115     for (j=0; j<na; j++) {
116       if (aa[j] != 0.0) goto ok1;
117     }
118     bb = bav + ib[i];
119     for (j=0; j <nb; j++) {
120       if (bb[j] != 0.0) goto ok1;
121     }
122     cnt++;
123 ok1:;
124   }
125   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
126   if (!n0rows) {
127     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
128     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
129     PetscFunctionReturn(0);
130   }
131   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
132   cnt  = 0;
133   for (i=0; i<m; i++) {
134     na = ia[i+1] - ia[i];
135     nb = ib[i+1] - ib[i];
136     if (!na && !nb) continue;
137     aa = aav + ia[i];
138     for (j=0; j<na;j++) {
139       if (aa[j] != 0.0) {
140         rows[cnt++] = rstart + i;
141         goto ok2;
142       }
143     }
144     bb = bav + ib[i];
145     for (j=0; j<nb; j++) {
146       if (bb[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151 ok2:;
152   }
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
154   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
155   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
156   PetscFunctionReturn(0);
157 }
158 
159 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
160 {
161   PetscErrorCode    ierr;
162   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
163   PetscBool         cong;
164 
165   PetscFunctionBegin;
166   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
167   if (Y->assembled && cong) {
168     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
169   } else {
170     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
176 {
177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
178   PetscErrorCode ierr;
179   PetscInt       i,rstart,nrows,*rows;
180 
181   PetscFunctionBegin;
182   *zrows = NULL;
183   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
184   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
185   for (i=0; i<nrows; i++) rows[i] += rstart;
186   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
191 {
192   PetscErrorCode    ierr;
193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
194   PetscInt          i,m,n,*garray = aij->garray;
195   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
196   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
197   PetscReal         *work;
198   const PetscScalar *dummy;
199 
200   PetscFunctionBegin;
201   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
202   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
203   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
204   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
205   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
206   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
207   if (type == NORM_2) {
208     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
209       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
210     }
211     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
212       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
213     }
214   } else if (type == NORM_1) {
215     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
216       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
217     }
218     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
219       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
220     }
221   } else if (type == NORM_INFINITY) {
222     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
223       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     }
225     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
226       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
227     }
228   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
229     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
230       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
231     }
232     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
233       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
234     }
235   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
236     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
237       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
238     }
239     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
240       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
241     }
242   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
243   if (type == NORM_INFINITY) {
244     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
245   } else {
246     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
247   }
248   ierr = PetscFree(work);CHKERRQ(ierr);
249   if (type == NORM_2) {
250     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
251   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
252     for (i=0; i<n; i++) reductions[i] /= m;
253   }
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
258 {
259   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
260   IS              sis,gis;
261   PetscErrorCode  ierr;
262   const PetscInt  *isis,*igis;
263   PetscInt        n,*iis,nsis,ngis,rstart,i;
264 
265   PetscFunctionBegin;
266   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
267   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
268   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
269   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
270   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
271   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
274   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
275   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
276   n    = ngis + nsis;
277   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
278   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
279   for (i=0; i<n; i++) iis[i] += rstart;
280   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
281 
282   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
283   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
284   ierr = ISDestroy(&sis);CHKERRQ(ierr);
285   ierr = ISDestroy(&gis);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 /*
290   Local utility routine that creates a mapping from the global column
291 number to the local number in the off-diagonal part of the local
292 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
293 a slightly higher hash table cost; without it it is not scalable (each processor
294 has an order N integer array but is fast to access.
295 */
296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
297 {
298   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
299   PetscErrorCode ierr;
300   PetscInt       n = aij->B->cmap->n,i;
301 
302   PetscFunctionBegin;
303   PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
304 #if defined(PETSC_USE_CTABLE)
305   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
306   for (i=0; i<n; i++) {
307     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
308   }
309 #else
310   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
311   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
312   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
313 #endif
314   PetscFunctionReturn(0);
315 }
316 
317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
318 { \
319     if (col <= lastcol1)  low1 = 0;     \
320     else                 high1 = nrow1; \
321     lastcol1 = col;\
322     while (high1-low1 > 5) { \
323       t = (low1+high1)/2; \
324       if (rp1[t] > col) high1 = t; \
325       else              low1  = t; \
326     } \
327       for (_i=low1; _i<high1; _i++) { \
328         if (rp1[_i] > col) break; \
329         if (rp1[_i] == col) { \
330           if (addv == ADD_VALUES) { \
331             ap1[_i] += value;   \
332             /* Not sure LogFlops will slow dow the code or not */ \
333             (void)PetscLogFlops(1.0);   \
334            } \
335           else                    ap1[_i] = value; \
336           goto a_noinsert; \
337         } \
338       }  \
339       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
340       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
341       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
342       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
343       N = nrow1++ - 1; a->nz++; high1++; \
344       /* shift up all the later entries in this row */ \
345       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
346       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
347       rp1[_i] = col;  \
348       ap1[_i] = value;  \
349       A->nonzerostate++;\
350       a_noinsert: ; \
351       ailen[row] = nrow1; \
352 }
353 
354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
355   { \
356     if (col <= lastcol2) low2 = 0;                        \
357     else high2 = nrow2;                                   \
358     lastcol2 = col;                                       \
359     while (high2-low2 > 5) {                              \
360       t = (low2+high2)/2;                                 \
361       if (rp2[t] > col) high2 = t;                        \
362       else             low2  = t;                         \
363     }                                                     \
364     for (_i=low2; _i<high2; _i++) {                       \
365       if (rp2[_i] > col) break;                           \
366       if (rp2[_i] == col) {                               \
367         if (addv == ADD_VALUES) {                         \
368           ap2[_i] += value;                               \
369           (void)PetscLogFlops(1.0);                       \
370         }                                                 \
371         else                    ap2[_i] = value;          \
372         goto b_noinsert;                                  \
373       }                                                   \
374     }                                                     \
375     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
376     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
377     PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
379     N = nrow2++ - 1; b->nz++; high2++;                    \
380     /* shift up all the later entries in this row */      \
381     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
382     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
383     rp2[_i] = col;                                        \
384     ap2[_i] = value;                                      \
385     B->nonzerostate++;                                    \
386     b_noinsert: ;                                         \
387     bilen[row] = nrow2;                                   \
388   }
389 
390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
391 {
392   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
393   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
394   PetscErrorCode ierr;
395   PetscInt       l,*garray = mat->garray,diag;
396   PetscScalar    *aa,*ba;
397 
398   PetscFunctionBegin;
399   /* code only works for square matrices A */
400 
401   /* find size of row to the left of the diagonal part */
402   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
403   row  = row - diag;
404   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
405     if (garray[b->j[b->i[row]+l]] > diag) break;
406   }
407   if (l) {
408     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
409     ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr);
410     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
411   }
412 
413   /* diagonal part */
414   if (a->i[row+1]-a->i[row]) {
415     ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr);
416     ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
417     ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr);
418   }
419 
420   /* right of diagonal part */
421   if (b->i[row+1]-b->i[row]-l) {
422     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
423     ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
424     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
430 {
431   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
432   PetscScalar    value = 0.0;
433   PetscErrorCode ierr;
434   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
435   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
436   PetscBool      roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat        A                    = aij->A;
440   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
441   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
442   PetscBool  ignorezeroentries    = a->ignorezeroentries;
443   Mat        B                    = aij->B;
444   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
445   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
446   MatScalar  *aa,*ba;
447   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
448   PetscInt   nonew;
449   MatScalar  *ap1,*ap2;
450 
451   PetscFunctionBegin;
452   ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
453   ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j=0; j<n; j++) {
475         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
481         } else if (in[j] < 0) continue;
482         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
510               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
511                 ierr = PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
512               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
513             }
514           } else col = in[j];
515           nonew = b->nonew;
516           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
517         }
518       }
519     } else {
520       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
521       if (!aij->donotstash) {
522         mat->assembled = PETSC_FALSE;
523         if (roworiented) {
524           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
532   ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 /*
537     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
538     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
539     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
540 */
541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
542 {
543   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
544   Mat            A           = aij->A; /* diagonal part of the matrix */
545   Mat            B           = aij->B; /* offdiagonal part of the matrix */
546   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
547   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
548   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
549   PetscInt       *ailen      = a->ilen,*aj = a->j;
550   PetscInt       *bilen      = b->ilen,*bj = b->j;
551   PetscInt       am          = aij->A->rmap->n,j;
552   PetscInt       diag_so_far = 0,dnz;
553   PetscInt       offd_so_far = 0,onz;
554 
555   PetscFunctionBegin;
556   /* Iterate over all rows of the matrix */
557   for (j=0; j<am; j++) {
558     dnz = onz = 0;
559     /*  Iterate over all non-zero columns of the current row */
560     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
561       /* If column is in the diagonal */
562       if (mat_j[col] >= cstart && mat_j[col] < cend) {
563         aj[diag_so_far++] = mat_j[col] - cstart;
564         dnz++;
565       } else { /* off-diagonal entries */
566         bj[offd_so_far++] = mat_j[col];
567         onz++;
568       }
569     }
570     ailen[j] = dnz;
571     bilen[j] = onz;
572   }
573   PetscFunctionReturn(0);
574 }
575 
576 /*
577     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
578     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
579     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
580     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
581     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
582 */
583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
584 {
585   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
586   Mat            A      = aij->A; /* diagonal part of the matrix */
587   Mat            B      = aij->B; /* offdiagonal part of the matrix */
588   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
589   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
590   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
591   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
592   PetscInt       *ailen = a->ilen,*aj = a->j;
593   PetscInt       *bilen = b->ilen,*bj = b->j;
594   PetscInt       am     = aij->A->rmap->n,j;
595   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
596   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
597   PetscScalar    *aa = a->a,*ba = b->a;
598 
599   PetscFunctionBegin;
600   /* Iterate over all rows of the matrix */
601   for (j=0; j<am; j++) {
602     dnz_row = onz_row = 0;
603     rowstart_offd = full_offd_i[j];
604     rowstart_diag = full_diag_i[j];
605     /*  Iterate over all non-zero columns of the current row */
606     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
607       /* If column is in the diagonal */
608       if (mat_j[col] >= cstart && mat_j[col] < cend) {
609         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
610         aa[rowstart_diag+dnz_row] = mat_a[col];
611         dnz_row++;
612       } else { /* off-diagonal entries */
613         bj[rowstart_offd+onz_row] = mat_j[col];
614         ba[rowstart_offd+onz_row] = mat_a[col];
615         onz_row++;
616       }
617     }
618     ailen[j] = dnz_row;
619     bilen[j] = onz_row;
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
625 {
626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
627   PetscErrorCode ierr;
628   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
629   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
630 
631   PetscFunctionBegin;
632   for (i=0; i<m; i++) {
633     if (idxm[i] < 0) continue; /* negative row */
634     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
635     if (idxm[i] >= rstart && idxm[i] < rend) {
636       row = idxm[i] - rstart;
637       for (j=0; j<n; j++) {
638         if (idxn[j] < 0) continue; /* negative column */
639         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
640         if (idxn[j] >= cstart && idxn[j] < cend) {
641           col  = idxn[j] - cstart;
642           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
643         } else {
644           if (!aij->colmap) {
645             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
646           }
647 #if defined(PETSC_USE_CTABLE)
648           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
654           else {
655             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
656           }
657         }
658       }
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
665 {
666   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
667   PetscErrorCode ierr;
668   PetscInt       nstash,reallocs;
669 
670   PetscFunctionBegin;
671   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
672 
673   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
674   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
675   ierr = PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
680 {
681   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
682   PetscErrorCode ierr;
683   PetscMPIInt    n;
684   PetscInt       i,j,rstart,ncols,flg;
685   PetscInt       *row,*col;
686   PetscBool      other_disassembled;
687   PetscScalar    *val;
688 
689   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
690 
691   PetscFunctionBegin;
692   if (!aij->donotstash && !mat->nooffprocentries) {
693     while (1) {
694       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
695       if (!flg) break;
696 
697       for (i=0; i<n;) {
698         /* Now identify the consecutive vals belonging to the same row */
699         for (j=i,rstart=row[j]; j<n; j++) {
700           if (row[j] != rstart) break;
701         }
702         if (j < n) ncols = j-i;
703         else       ncols = n-i;
704         /* Now assemble all these values with a single function call */
705         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
706         i    = j;
707       }
708     }
709     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
710   }
711 #if defined(PETSC_HAVE_DEVICE)
712   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
713   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
714   if (mat->boundtocpu) {
715     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
716     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
717   }
718 #endif
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourself, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
730     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738 #if defined(PETSC_HAVE_DEVICE)
739   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
740 #endif
741   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
742   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
743 
744   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
745 
746   aij->rowvalues = NULL;
747 
748   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
749 
750   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
751   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
752     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
753     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
754   }
755 #if defined(PETSC_HAVE_DEVICE)
756   mat->offloadmask = PETSC_OFFLOAD_BOTH;
757 #endif
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
762 {
763   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
768   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
775   PetscObjectState sA, sB;
776   PetscInt        *lrows;
777   PetscInt         r, len;
778   PetscBool        cong, lch, gch;
779   PetscErrorCode   ierr;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
784   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
795     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
803     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
824     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
834     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
835   }
836   ierr = PetscFree(lrows);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscErrorCode    ierr;
851   PetscMPIInt       n = A->rmap->n;
852   PetscInt          i,j,r,m,len = 0;
853   PetscInt          *lrows,*owners = A->rmap->range;
854   PetscMPIInt       p = 0;
855   PetscSFNode       *rrows;
856   PetscSF           sf;
857   const PetscScalar *xx;
858   PetscScalar       *bb,*mask,*aij_a;
859   Vec               xmask,lmask;
860   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
861   const PetscInt    *aj, *ii,*ridx;
862   PetscScalar       *aa;
863 
864   PetscFunctionBegin;
865   /* Create SF where leaves are input rows and roots are owned rows */
866   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
867   for (r = 0; r < n; ++r) lrows[r] = -1;
868   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
869   for (r = 0; r < N; ++r) {
870     const PetscInt idx   = rows[r];
871     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
872     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
873       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
874     }
875     rrows[r].rank  = p;
876     rrows[r].index = rows[r] - owners[p];
877   }
878   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
879   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
880   /* Collect flags for rows to be zeroed */
881   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
882   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
883   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
884   /* Compress and put in row numbers */
885   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
886   /* zero diagonal part of matrix */
887   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
888   /* handle off diagonal part of matrix */
889   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
890   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
891   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
892   for (i=0; i<len; i++) bb[lrows[i]] = 1;
893   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
894   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
897   if (x && b) { /* this code is buggy when the row and column layout don't match */
898     PetscBool cong;
899 
900     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
901     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
902     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
905     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
906   }
907   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
908   /* remove zeroed rows of off diagonal matrix */
909   ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr);
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij_a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij_a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x && b) {
951     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
952     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
953   }
954   ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr);
955   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
956   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
957   ierr = PetscFree(lrows);CHKERRQ(ierr);
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971   PetscInt       nt;
972   VecScatter     Mvctx = a->Mvctx;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
977   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
979   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   VecScatter     Mvctx = a->Mvctx;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1003   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1009 {
1010   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   /* do nondiagonal part */
1015   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1016   /* do local part */
1017   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1018   /* add partial results together */
1019   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1025 {
1026   MPI_Comm       comm;
1027   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1028   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1029   IS             Me,Notme;
1030   PetscErrorCode ierr;
1031   PetscInt       M,N,first,last,*notme,i;
1032   PetscBool      lf;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1039   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1067 {
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   /* do nondiagonal part */
1082   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1083   /* do local part */
1084   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1085   /* add partial results together */
1086   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /*
1092   This only works correctly for square matrices where the subblock A->A is the
1093    diagonal block
1094 */
1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1096 {
1097   PetscErrorCode ierr;
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099 
1100   PetscFunctionBegin;
1101   PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1102   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1103   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1114   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1119 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1120 {
1121   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = PetscSFDestroy(&aij->coo_sf);CHKERRQ(ierr);
1126   ierr = PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1);CHKERRQ(ierr);
1127   ierr = PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2);CHKERRQ(ierr);
1128   ierr = PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2);CHKERRQ(ierr);
1129   ierr = PetscFree2(aij->sendbuf,aij->recvbuf);CHKERRQ(ierr);
1130   ierr = PetscFree(aij->Cperm1);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1135 {
1136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1137   PetscErrorCode ierr;
1138 
1139   PetscFunctionBegin;
1140 #if defined(PETSC_USE_LOG)
1141   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1142 #endif
1143   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1144   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1145   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1146   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1147 #if defined(PETSC_USE_CTABLE)
1148   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1149 #else
1150   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1151 #endif
1152   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1153   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1154   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1155   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1156   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1157 
1158   /* Free COO */
1159   ierr = MatResetPreallocationCOO_MPIAIJ(mat);CHKERRQ(ierr);
1160 
1161   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1162 
1163   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1164   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1165 
1166   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1176 #if defined(PETSC_HAVE_CUDA)
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1178 #endif
1179 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1183 #if defined(PETSC_HAVE_ELEMENTAL)
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1185 #endif
1186 #if defined(PETSC_HAVE_SCALAPACK)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1188 #endif
1189 #if defined(PETSC_HAVE_HYPRE)
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1192 #endif
1193   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1194   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1199 #if defined(PETSC_HAVE_MKL_SPARSE)
1200   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1201 #endif
1202   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1203   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1204   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1211 {
1212   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1213   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1214   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1215   const PetscInt    *garray = aij->garray;
1216   const PetscScalar *aa,*ba;
1217   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1218   PetscInt          *rowlens;
1219   PetscInt          *colidxs;
1220   PetscScalar       *matvals;
1221   PetscErrorCode    ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1225 
1226   M  = mat->rmap->N;
1227   N  = mat->cmap->N;
1228   m  = mat->rmap->n;
1229   rs = mat->rmap->rstart;
1230   cs = mat->cmap->rstart;
1231   nz = A->nz + B->nz;
1232 
1233   /* write matrix header */
1234   header[0] = MAT_FILE_CLASSID;
1235   header[1] = M; header[2] = N; header[3] = nz;
1236   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1237   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1238 
1239   /* fill in and store row lengths  */
1240   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1241   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1242   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1243   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1244 
1245   /* fill in and store column indices */
1246   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1247   for (cnt=0, i=0; i<m; i++) {
1248     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1249       if (garray[B->j[jb]] > cs) break;
1250       colidxs[cnt++] = garray[B->j[jb]];
1251     }
1252     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1253       colidxs[cnt++] = A->j[ja] + cs;
1254     for (; jb<B->i[i+1]; jb++)
1255       colidxs[cnt++] = garray[B->j[jb]];
1256   }
1257   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1258   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1259   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1260 
1261   /* fill in and store nonzero values */
1262   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1263   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1264   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1265   for (cnt=0, i=0; i<m; i++) {
1266     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1267       if (garray[B->j[jb]] > cs) break;
1268       matvals[cnt++] = ba[jb];
1269     }
1270     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1271       matvals[cnt++] = aa[ja];
1272     for (; jb<B->i[i+1]; jb++)
1273       matvals[cnt++] = ba[jb];
1274   }
1275   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1276   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1277   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1278   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1279   ierr = PetscFree(matvals);CHKERRQ(ierr);
1280 
1281   /* write block size option to the viewer's .info file */
1282   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #include <petscdraw.h>
1287 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1288 {
1289   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank = aij->rank,size = aij->size;
1292   PetscBool         isdraw,iascii,isbinary;
1293   PetscViewer       sviewer;
1294   PetscViewerFormat format;
1295 
1296   PetscFunctionBegin;
1297   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1298   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1299   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1300   if (iascii) {
1301     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1302     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1303       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1304       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1305       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1306       for (i=0; i<(PetscInt)size; i++) {
1307         nmax = PetscMax(nmax,nz[i]);
1308         nmin = PetscMin(nmin,nz[i]);
1309         navg += nz[i];
1310       }
1311       ierr = PetscFree(nz);CHKERRQ(ierr);
1312       navg = navg/size;
1313       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax);CHKERRQ(ierr);
1314       PetscFunctionReturn(0);
1315     }
1316     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1317     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1318       MatInfo   info;
1319       PetscInt *inodes=NULL;
1320 
1321       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1322       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1323       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1324       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1325       if (!inodes) {
1326         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1327                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1328       } else {
1329         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1330                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1331       }
1332       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1334       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1335       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1336       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1337       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1338       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1339       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1340       PetscFunctionReturn(0);
1341     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1342       PetscInt inodecount,inodelimit,*inodes;
1343       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1344       if (inodes) {
1345         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit);CHKERRQ(ierr);
1346       } else {
1347         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1348       }
1349       PetscFunctionReturn(0);
1350     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1351       PetscFunctionReturn(0);
1352     }
1353   } else if (isbinary) {
1354     if (size == 1) {
1355       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1356       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1357     } else {
1358       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1359     }
1360     PetscFunctionReturn(0);
1361   } else if (iascii && size == 1) {
1362     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1363     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1364     PetscFunctionReturn(0);
1365   } else if (isdraw) {
1366     PetscDraw draw;
1367     PetscBool isnull;
1368     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1369     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1370     if (isnull) PetscFunctionReturn(0);
1371   }
1372 
1373   { /* assemble the entire matrix onto first processor */
1374     Mat A = NULL, Av;
1375     IS  isrow,iscol;
1376 
1377     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1378     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1379     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1380     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1381 /*  The commented code uses MatCreateSubMatrices instead */
1382 /*
1383     Mat *AA, A = NULL, Av;
1384     IS  isrow,iscol;
1385 
1386     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1387     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1388     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1389     if (rank == 0) {
1390        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1391        A    = AA[0];
1392        Av   = AA[0];
1393     }
1394     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1395 */
1396     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1397     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1398     /*
1399        Everyone has to call to draw the matrix since the graphics waits are
1400        synchronized across all processors that share the PetscDraw object
1401     */
1402     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1403     if (rank == 0) {
1404       if (((PetscObject)mat)->name) {
1405         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1406       }
1407       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1408     }
1409     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1410     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1411     ierr = MatDestroy(&A);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1417 {
1418   PetscErrorCode ierr;
1419   PetscBool      iascii,isdraw,issocket,isbinary;
1420 
1421   PetscFunctionBegin;
1422   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1423   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1424   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1425   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1426   if (iascii || isdraw || isbinary || issocket) {
1427     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1433 {
1434   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1435   PetscErrorCode ierr;
1436   Vec            bb1 = NULL;
1437   PetscBool      hasop;
1438 
1439   PetscFunctionBegin;
1440   if (flag == SOR_APPLY_UPPER) {
1441     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1442     PetscFunctionReturn(0);
1443   }
1444 
1445   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1446     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1447   }
1448 
1449   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1450     if (flag & SOR_ZERO_INITIAL_GUESS) {
1451       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1452       its--;
1453     }
1454 
1455     while (its--) {
1456       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1461       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1462 
1463       /* local sweep */
1464       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1465     }
1466   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1467     if (flag & SOR_ZERO_INITIAL_GUESS) {
1468       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1469       its--;
1470     }
1471     while (its--) {
1472       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1477       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1478 
1479       /* local sweep */
1480       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1481     }
1482   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1483     if (flag & SOR_ZERO_INITIAL_GUESS) {
1484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1485       its--;
1486     }
1487     while (its--) {
1488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490 
1491       /* update rhs: bb1 = bb - B*x */
1492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1494 
1495       /* local sweep */
1496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1497     }
1498   } else if (flag & SOR_EISENSTAT) {
1499     Vec xx1;
1500 
1501     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1502     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1503 
1504     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506     if (!mat->diag) {
1507       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1508       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1509     }
1510     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1511     if (hasop) {
1512       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1513     } else {
1514       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1515     }
1516     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1517 
1518     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1519 
1520     /* local sweep */
1521     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1522     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1523     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1524   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1525 
1526   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1527 
1528   matin->factorerrortype = mat->A->factorerrortype;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1533 {
1534   Mat            aA,aB,Aperm;
1535   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1536   PetscScalar    *aa,*ba;
1537   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1538   PetscSF        rowsf,sf;
1539   IS             parcolp = NULL;
1540   PetscBool      done;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1545   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1546   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1547   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1548 
1549   /* Invert row permutation to find out where my rows should go */
1550   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1551   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1552   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1553   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1554   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1555   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1556 
1557   /* Invert column permutation to find out where my columns should go */
1558   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1559   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1560   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1561   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1562   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1563   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1565 
1566   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1567   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1568   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1569 
1570   /* Find out where my gcols should go */
1571   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1572   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1573   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1574   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1575   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1576   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1579 
1580   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1581   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1582   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1583   for (i=0; i<m; i++) {
1584     PetscInt    row = rdest[i];
1585     PetscMPIInt rowner;
1586     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1587     for (j=ai[i]; j<ai[i+1]; j++) {
1588       PetscInt    col = cdest[aj[j]];
1589       PetscMPIInt cowner;
1590       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1591       if (rowner == cowner) dnnz[i]++;
1592       else onnz[i]++;
1593     }
1594     for (j=bi[i]; j<bi[i+1]; j++) {
1595       PetscInt    col = gcdest[bj[j]];
1596       PetscMPIInt cowner;
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601   }
1602   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1605   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1606   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1607 
1608   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1609   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1610   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1611   for (i=0; i<m; i++) {
1612     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1613     PetscInt j0,rowlen;
1614     rowlen = ai[i+1] - ai[i];
1615     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1616       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1617       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1618     }
1619     rowlen = bi[i+1] - bi[i];
1620     for (j0=j=0; j<rowlen; j0=j) {
1621       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1622       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1623     }
1624   }
1625   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1626   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1627   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1628   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1629   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1630   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1631   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1632   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1633   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1634   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1635   *B = Aperm;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1640 {
1641   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1642   PetscErrorCode ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1646   if (ghosts) *ghosts = aij->garray;
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1651 {
1652   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1653   Mat            A    = mat->A,B = mat->B;
1654   PetscErrorCode ierr;
1655   PetscLogDouble isend[5],irecv[5];
1656 
1657   PetscFunctionBegin;
1658   info->block_size = 1.0;
1659   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1660 
1661   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1662   isend[3] = info->memory;  isend[4] = info->mallocs;
1663 
1664   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1665 
1666   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1667   isend[3] += info->memory;  isend[4] += info->mallocs;
1668   if (flag == MAT_LOCAL) {
1669     info->nz_used      = isend[0];
1670     info->nz_allocated = isend[1];
1671     info->nz_unneeded  = isend[2];
1672     info->memory       = isend[3];
1673     info->mallocs      = isend[4];
1674   } else if (flag == MAT_GLOBAL_MAX) {
1675     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1676 
1677     info->nz_used      = irecv[0];
1678     info->nz_allocated = irecv[1];
1679     info->nz_unneeded  = irecv[2];
1680     info->memory       = irecv[3];
1681     info->mallocs      = irecv[4];
1682   } else if (flag == MAT_GLOBAL_SUM) {
1683     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1684 
1685     info->nz_used      = irecv[0];
1686     info->nz_allocated = irecv[1];
1687     info->nz_unneeded  = irecv[2];
1688     info->memory       = irecv[3];
1689     info->mallocs      = irecv[4];
1690   }
1691   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1692   info->fill_ratio_needed = 0;
1693   info->factor_mallocs    = 0;
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1698 {
1699   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1700   PetscErrorCode ierr;
1701 
1702   PetscFunctionBegin;
1703   switch (op) {
1704   case MAT_NEW_NONZERO_LOCATIONS:
1705   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1706   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1707   case MAT_KEEP_NONZERO_PATTERN:
1708   case MAT_NEW_NONZERO_LOCATION_ERR:
1709   case MAT_USE_INODES:
1710   case MAT_IGNORE_ZERO_ENTRIES:
1711   case MAT_FORM_EXPLICIT_TRANSPOSE:
1712     MatCheckPreallocated(A,1);
1713     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1714     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1715     break;
1716   case MAT_ROW_ORIENTED:
1717     MatCheckPreallocated(A,1);
1718     a->roworiented = flg;
1719 
1720     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1721     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1722     break;
1723   case MAT_FORCE_DIAGONAL_ENTRIES:
1724   case MAT_SORTED_FULL:
1725     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1726     break;
1727   case MAT_IGNORE_OFF_PROC_ENTRIES:
1728     a->donotstash = flg;
1729     break;
1730   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1731   case MAT_SPD:
1732   case MAT_SYMMETRIC:
1733   case MAT_STRUCTURALLY_SYMMETRIC:
1734   case MAT_HERMITIAN:
1735   case MAT_SYMMETRY_ETERNAL:
1736     break;
1737   case MAT_SUBMAT_SINGLEIS:
1738     A->submat_singleis = flg;
1739     break;
1740   case MAT_STRUCTURE_ONLY:
1741     /* The option is handled directly by MatSetOption() */
1742     break;
1743   default:
1744     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1750 {
1751   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1752   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1753   PetscErrorCode ierr;
1754   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1755   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1756   PetscInt       *cmap,*idx_p;
1757 
1758   PetscFunctionBegin;
1759   PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1760   mat->getrowactive = PETSC_TRUE;
1761 
1762   if (!mat->rowvalues && (idx || v)) {
1763     /*
1764         allocate enough space to hold information from the longest row.
1765     */
1766     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1767     PetscInt   max = 1,tmp;
1768     for (i=0; i<matin->rmap->n; i++) {
1769       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1770       if (max < tmp) max = tmp;
1771     }
1772     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1773   }
1774 
1775   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1776   lrow = row - rstart;
1777 
1778   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1779   if (!v)   {pvA = NULL; pvB = NULL;}
1780   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1781   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1782   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1783   nztot = nzA + nzB;
1784 
1785   cmap = mat->garray;
1786   if (v  || idx) {
1787     if (nztot) {
1788       /* Sort by increasing column numbers, assuming A and B already sorted */
1789       PetscInt imark = -1;
1790       if (v) {
1791         *v = v_p = mat->rowvalues;
1792         for (i=0; i<nzB; i++) {
1793           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1794           else break;
1795         }
1796         imark = i;
1797         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1798         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1799       }
1800       if (idx) {
1801         *idx = idx_p = mat->rowindices;
1802         if (imark > -1) {
1803           for (i=0; i<imark; i++) {
1804             idx_p[i] = cmap[cworkB[i]];
1805           }
1806         } else {
1807           for (i=0; i<nzB; i++) {
1808             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1809             else break;
1810           }
1811           imark = i;
1812         }
1813         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1814         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1815       }
1816     } else {
1817       if (idx) *idx = NULL;
1818       if (v)   *v   = NULL;
1819     }
1820   }
1821   *nz  = nztot;
1822   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1823   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1828 {
1829   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1830 
1831   PetscFunctionBegin;
1832   PetscCheckFalse(!aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1833   aij->getrowactive = PETSC_FALSE;
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1838 {
1839   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1840   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1841   PetscErrorCode  ierr;
1842   PetscInt        i,j,cstart = mat->cmap->rstart;
1843   PetscReal       sum = 0.0;
1844   const MatScalar *v,*amata,*bmata;
1845 
1846   PetscFunctionBegin;
1847   if (aij->size == 1) {
1848     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1849   } else {
1850     ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr);
1851     ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1852     if (type == NORM_FROBENIUS) {
1853       v = amata;
1854       for (i=0; i<amat->nz; i++) {
1855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1856       }
1857       v = bmata;
1858       for (i=0; i<bmat->nz; i++) {
1859         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1860       }
1861       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1862       *norm = PetscSqrtReal(*norm);
1863       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1864     } else if (type == NORM_1) { /* max column norm */
1865       PetscReal *tmp,*tmp2;
1866       PetscInt  *jj,*garray = aij->garray;
1867       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1868       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1869       *norm = 0.0;
1870       v     = amata; jj = amat->j;
1871       for (j=0; j<amat->nz; j++) {
1872         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1873       }
1874       v = bmata; jj = bmat->j;
1875       for (j=0; j<bmat->nz; j++) {
1876         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1877       }
1878       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1879       for (j=0; j<mat->cmap->N; j++) {
1880         if (tmp2[j] > *norm) *norm = tmp2[j];
1881       }
1882       ierr = PetscFree(tmp);CHKERRQ(ierr);
1883       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1884       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1885     } else if (type == NORM_INFINITY) { /* max row norm */
1886       PetscReal ntemp = 0.0;
1887       for (j=0; j<aij->A->rmap->n; j++) {
1888         v   = amata + amat->i[j];
1889         sum = 0.0;
1890         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v); v++;
1892         }
1893         v = bmata + bmat->i[j];
1894         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1895           sum += PetscAbsScalar(*v); v++;
1896         }
1897         if (sum > ntemp) ntemp = sum;
1898       }
1899       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1900       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1901     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1902     ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr);
1903     ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1909 {
1910   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1911   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1912   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1913   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1914   PetscErrorCode  ierr;
1915   Mat             B,A_diag,*B_diag;
1916   const MatScalar *pbv,*bv;
1917 
1918   PetscFunctionBegin;
1919   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1920   ai = Aloc->i; aj = Aloc->j;
1921   bi = Bloc->i; bj = Bloc->j;
1922   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1923     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1924     PetscSFNode          *oloc;
1925     PETSC_UNUSED PetscSF sf;
1926 
1927     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1928     /* compute d_nnz for preallocation */
1929     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1930     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1931     /* compute local off-diagonal contributions */
1932     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1933     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1934     /* map those to global */
1935     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1936     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1937     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1938     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1939     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1940     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1941     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1942 
1943     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1944     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1945     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1946     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1947     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1948     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1949   } else {
1950     B    = *matout;
1951     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1952   }
1953 
1954   b           = (Mat_MPIAIJ*)B->data;
1955   A_diag      = a->A;
1956   B_diag      = &b->A;
1957   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1958   A_diag_ncol = A_diag->cmap->N;
1959   B_diag_ilen = sub_B_diag->ilen;
1960   B_diag_i    = sub_B_diag->i;
1961 
1962   /* Set ilen for diagonal of B */
1963   for (i=0; i<A_diag_ncol; i++) {
1964     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1965   }
1966 
1967   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1968   very quickly (=without using MatSetValues), because all writes are local. */
1969   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1970 
1971   /* copy over the B part */
1972   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1973   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1974   pbv  = bv;
1975   row  = A->rmap->rstart;
1976   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1977   cols_tmp = cols;
1978   for (i=0; i<mb; i++) {
1979     ncol = bi[i+1]-bi[i];
1980     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1981     row++;
1982     pbv += ncol; cols_tmp += ncol;
1983   }
1984   ierr = PetscFree(cols);CHKERRQ(ierr);
1985   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1986 
1987   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1989   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1990     *matout = B;
1991   } else {
1992     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1998 {
1999   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2000   Mat            a    = aij->A,b = aij->B;
2001   PetscErrorCode ierr;
2002   PetscInt       s1,s2,s3;
2003 
2004   PetscFunctionBegin;
2005   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2006   if (rr) {
2007     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2008     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2009     /* Overlap communication with computation. */
2010     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2011   }
2012   if (ll) {
2013     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2014     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2015     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2016   }
2017   /* scale  the diagonal block */
2018   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2019 
2020   if (rr) {
2021     /* Do a scatter end and then right scale the off-diagonal block */
2022     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2023     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2024   }
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2029 {
2030   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2031   PetscErrorCode ierr;
2032 
2033   PetscFunctionBegin;
2034   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2039 {
2040   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2041   Mat            a,b,c,d;
2042   PetscBool      flg;
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   a = matA->A; b = matA->B;
2047   c = matB->A; d = matB->B;
2048 
2049   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2050   if (flg) {
2051     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2052   }
2053   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2058 {
2059   PetscErrorCode ierr;
2060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2061   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2062 
2063   PetscFunctionBegin;
2064   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2065   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2066     /* because of the column compression in the off-processor part of the matrix a->B,
2067        the number of columns in a->B and b->B may be different, hence we cannot call
2068        the MatCopy() directly on the two parts. If need be, we can provide a more
2069        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2070        then copying the submatrices */
2071     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2072   } else {
2073     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2074     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2075   }
2076   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2081 {
2082   PetscErrorCode ierr;
2083 
2084   PetscFunctionBegin;
2085   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 /*
2090    Computes the number of nonzeros per row needed for preallocation when X and Y
2091    have different nonzero structure.
2092 */
2093 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2094 {
2095   PetscInt       i,j,k,nzx,nzy;
2096 
2097   PetscFunctionBegin;
2098   /* Set the number of nonzeros in the new matrix */
2099   for (i=0; i<m; i++) {
2100     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2101     nzx = xi[i+1] - xi[i];
2102     nzy = yi[i+1] - yi[i];
2103     nnz[i] = 0;
2104     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2105       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2106       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2107       nnz[i]++;
2108     }
2109     for (; k<nzy; k++) nnz[i]++;
2110   }
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2115 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2116 {
2117   PetscErrorCode ierr;
2118   PetscInt       m = Y->rmap->N;
2119   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2120   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2121 
2122   PetscFunctionBegin;
2123   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2128 {
2129   PetscErrorCode ierr;
2130   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2131 
2132   PetscFunctionBegin;
2133   if (str == SAME_NONZERO_PATTERN) {
2134     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2135     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2136   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2137     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2138   } else {
2139     Mat      B;
2140     PetscInt *nnz_d,*nnz_o;
2141 
2142     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2143     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2144     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2145     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2146     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2147     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2148     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2149     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2150     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2151     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2152     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
2153     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2154     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2155   }
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2160 
2161 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2162 {
2163 #if defined(PETSC_USE_COMPLEX)
2164   PetscErrorCode ierr;
2165   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2166 
2167   PetscFunctionBegin;
2168   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2169   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2170 #else
2171   PetscFunctionBegin;
2172 #endif
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2177 {
2178   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2183   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2188 {
2189   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2190   PetscErrorCode ierr;
2191 
2192   PetscFunctionBegin;
2193   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2194   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2199 {
2200   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2201   PetscErrorCode    ierr;
2202   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2203   PetscScalar       *va,*vv;
2204   Vec               vB,vA;
2205   const PetscScalar *vb;
2206 
2207   PetscFunctionBegin;
2208   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2209   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2210 
2211   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2212   if (idx) {
2213     for (i=0; i<m; i++) {
2214       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2215     }
2216   }
2217 
2218   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2219   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2220   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2221 
2222   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2223   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2224   for (i=0; i<m; i++) {
2225     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2226       vv[i] = vb[i];
2227       if (idx) idx[i] = a->garray[idxb[i]];
2228     } else {
2229       vv[i] = va[i];
2230       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2231         idx[i] = a->garray[idxb[i]];
2232     }
2233   }
2234   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2235   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2236   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2237   ierr = PetscFree(idxb);CHKERRQ(ierr);
2238   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2239   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2244 {
2245   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2246   PetscInt          m = A->rmap->n,n = A->cmap->n;
2247   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2248   PetscInt          *cmap  = mat->garray;
2249   PetscInt          *diagIdx, *offdiagIdx;
2250   Vec               diagV, offdiagV;
2251   PetscScalar       *a, *diagA, *offdiagA;
2252   const PetscScalar *ba,*bav;
2253   PetscInt          r,j,col,ncols,*bi,*bj;
2254   PetscErrorCode    ierr;
2255   Mat               B = mat->B;
2256   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2257 
2258   PetscFunctionBegin;
2259   /* When a process holds entire A and other processes have no entry */
2260   if (A->cmap->N == n) {
2261     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2262     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2263     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2264     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2265     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2266     PetscFunctionReturn(0);
2267   } else if (n == 0) {
2268     if (m) {
2269       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2270       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2271       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2272     }
2273     PetscFunctionReturn(0);
2274   }
2275 
2276   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2277   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2278   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2279   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2280 
2281   /* Get offdiagIdx[] for implicit 0.0 */
2282   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2283   ba   = bav;
2284   bi   = b->i;
2285   bj   = b->j;
2286   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2287   for (r = 0; r < m; r++) {
2288     ncols = bi[r+1] - bi[r];
2289     if (ncols == A->cmap->N - n) { /* Brow is dense */
2290       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2291     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2292       offdiagA[r] = 0.0;
2293 
2294       /* Find first hole in the cmap */
2295       for (j=0; j<ncols; j++) {
2296         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2297         if (col > j && j < cstart) {
2298           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2299           break;
2300         } else if (col > j + n && j >= cstart) {
2301           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2302           break;
2303         }
2304       }
2305       if (j == ncols && ncols < A->cmap->N - n) {
2306         /* a hole is outside compressed Bcols */
2307         if (ncols == 0) {
2308           if (cstart) {
2309             offdiagIdx[r] = 0;
2310           } else offdiagIdx[r] = cend;
2311         } else { /* ncols > 0 */
2312           offdiagIdx[r] = cmap[ncols-1] + 1;
2313           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2314         }
2315       }
2316     }
2317 
2318     for (j=0; j<ncols; j++) {
2319       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2320       ba++; bj++;
2321     }
2322   }
2323 
2324   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2325   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2326   for (r = 0; r < m; ++r) {
2327     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2328       a[r]   = diagA[r];
2329       if (idx) idx[r] = cstart + diagIdx[r];
2330     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2331       a[r] = diagA[r];
2332       if (idx) {
2333         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2334           idx[r] = cstart + diagIdx[r];
2335         } else idx[r] = offdiagIdx[r];
2336       }
2337     } else {
2338       a[r]   = offdiagA[r];
2339       if (idx) idx[r] = offdiagIdx[r];
2340     }
2341   }
2342   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2343   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2344   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2345   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2346   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2347   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2348   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2353 {
2354   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2355   PetscInt          m = A->rmap->n,n = A->cmap->n;
2356   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2357   PetscInt          *cmap  = mat->garray;
2358   PetscInt          *diagIdx, *offdiagIdx;
2359   Vec               diagV, offdiagV;
2360   PetscScalar       *a, *diagA, *offdiagA;
2361   const PetscScalar *ba,*bav;
2362   PetscInt          r,j,col,ncols,*bi,*bj;
2363   PetscErrorCode    ierr;
2364   Mat               B = mat->B;
2365   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2366 
2367   PetscFunctionBegin;
2368   /* When a process holds entire A and other processes have no entry */
2369   if (A->cmap->N == n) {
2370     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2371     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2372     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2373     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2374     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2375     PetscFunctionReturn(0);
2376   } else if (n == 0) {
2377     if (m) {
2378       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2379       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2380       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2381     }
2382     PetscFunctionReturn(0);
2383   }
2384 
2385   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2386   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2387   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2388   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2389 
2390   /* Get offdiagIdx[] for implicit 0.0 */
2391   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2392   ba   = bav;
2393   bi   = b->i;
2394   bj   = b->j;
2395   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2396   for (r = 0; r < m; r++) {
2397     ncols = bi[r+1] - bi[r];
2398     if (ncols == A->cmap->N - n) { /* Brow is dense */
2399       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2400     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2401       offdiagA[r] = 0.0;
2402 
2403       /* Find first hole in the cmap */
2404       for (j=0; j<ncols; j++) {
2405         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2406         if (col > j && j < cstart) {
2407           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2408           break;
2409         } else if (col > j + n && j >= cstart) {
2410           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2411           break;
2412         }
2413       }
2414       if (j == ncols && ncols < A->cmap->N - n) {
2415         /* a hole is outside compressed Bcols */
2416         if (ncols == 0) {
2417           if (cstart) {
2418             offdiagIdx[r] = 0;
2419           } else offdiagIdx[r] = cend;
2420         } else { /* ncols > 0 */
2421           offdiagIdx[r] = cmap[ncols-1] + 1;
2422           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2423         }
2424       }
2425     }
2426 
2427     for (j=0; j<ncols; j++) {
2428       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2429       ba++; bj++;
2430     }
2431   }
2432 
2433   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2434   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2435   for (r = 0; r < m; ++r) {
2436     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2437       a[r]   = diagA[r];
2438       if (idx) idx[r] = cstart + diagIdx[r];
2439     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2440       a[r] = diagA[r];
2441       if (idx) {
2442         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2443           idx[r] = cstart + diagIdx[r];
2444         } else idx[r] = offdiagIdx[r];
2445       }
2446     } else {
2447       a[r]   = offdiagA[r];
2448       if (idx) idx[r] = offdiagIdx[r];
2449     }
2450   }
2451   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2452   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2453   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2454   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2455   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2456   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2457   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2462 {
2463   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2464   PetscInt          m = A->rmap->n,n = A->cmap->n;
2465   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2466   PetscInt          *cmap  = mat->garray;
2467   PetscInt          *diagIdx, *offdiagIdx;
2468   Vec               diagV, offdiagV;
2469   PetscScalar       *a, *diagA, *offdiagA;
2470   const PetscScalar *ba,*bav;
2471   PetscInt          r,j,col,ncols,*bi,*bj;
2472   PetscErrorCode    ierr;
2473   Mat               B = mat->B;
2474   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2475 
2476   PetscFunctionBegin;
2477   /* When a process holds entire A and other processes have no entry */
2478   if (A->cmap->N == n) {
2479     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2480     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2481     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2482     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2483     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2484     PetscFunctionReturn(0);
2485   } else if (n == 0) {
2486     if (m) {
2487       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2488       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2489       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2490     }
2491     PetscFunctionReturn(0);
2492   }
2493 
2494   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2496   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2497   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2498 
2499   /* Get offdiagIdx[] for implicit 0.0 */
2500   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2501   ba   = bav;
2502   bi   = b->i;
2503   bj   = b->j;
2504   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2505   for (r = 0; r < m; r++) {
2506     ncols = bi[r+1] - bi[r];
2507     if (ncols == A->cmap->N - n) { /* Brow is dense */
2508       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2509     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2510       offdiagA[r] = 0.0;
2511 
2512       /* Find first hole in the cmap */
2513       for (j=0; j<ncols; j++) {
2514         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2515         if (col > j && j < cstart) {
2516           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2517           break;
2518         } else if (col > j + n && j >= cstart) {
2519           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2520           break;
2521         }
2522       }
2523       if (j == ncols && ncols < A->cmap->N - n) {
2524         /* a hole is outside compressed Bcols */
2525         if (ncols == 0) {
2526           if (cstart) {
2527             offdiagIdx[r] = 0;
2528           } else offdiagIdx[r] = cend;
2529         } else { /* ncols > 0 */
2530           offdiagIdx[r] = cmap[ncols-1] + 1;
2531           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2532         }
2533       }
2534     }
2535 
2536     for (j=0; j<ncols; j++) {
2537       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2538       ba++; bj++;
2539     }
2540   }
2541 
2542   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2543   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2544   for (r = 0; r < m; ++r) {
2545     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2546       a[r] = diagA[r];
2547       if (idx) idx[r] = cstart + diagIdx[r];
2548     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2549       a[r] = diagA[r];
2550       if (idx) {
2551         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2552           idx[r] = cstart + diagIdx[r];
2553         } else idx[r] = offdiagIdx[r];
2554       }
2555     } else {
2556       a[r] = offdiagA[r];
2557       if (idx) idx[r] = offdiagIdx[r];
2558     }
2559   }
2560   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2561   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2562   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2563   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2564   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2565   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2566   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2571 {
2572   PetscErrorCode ierr;
2573   Mat            *dummy;
2574 
2575   PetscFunctionBegin;
2576   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2577   *newmat = *dummy;
2578   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2583 {
2584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2585   PetscErrorCode ierr;
2586 
2587   PetscFunctionBegin;
2588   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2589   A->factorerrortype = a->A->factorerrortype;
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2594 {
2595   PetscErrorCode ierr;
2596   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2597 
2598   PetscFunctionBegin;
2599   PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2600   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2601   if (x->assembled) {
2602     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2603   } else {
2604     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2605   }
2606   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2607   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2612 {
2613   PetscFunctionBegin;
2614   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2615   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 /*@
2620    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2621 
2622    Collective on Mat
2623 
2624    Input Parameters:
2625 +    A - the matrix
2626 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2627 
2628  Level: advanced
2629 
2630 @*/
2631 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2632 {
2633   PetscErrorCode       ierr;
2634 
2635   PetscFunctionBegin;
2636   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2641 {
2642   PetscErrorCode       ierr;
2643   PetscBool            sc = PETSC_FALSE,flg;
2644 
2645   PetscFunctionBegin;
2646   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2647   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2648   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2649   if (flg) {
2650     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2651   }
2652   ierr = PetscOptionsTail();CHKERRQ(ierr);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2657 {
2658   PetscErrorCode ierr;
2659   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2660   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2661 
2662   PetscFunctionBegin;
2663   if (!Y->preallocated) {
2664     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2665   } else if (!aij->nz) {
2666     PetscInt nonew = aij->nonew;
2667     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2668     aij->nonew = nonew;
2669   }
2670   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2675 {
2676   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2677   PetscErrorCode ierr;
2678 
2679   PetscFunctionBegin;
2680   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2681   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2682   if (d) {
2683     PetscInt rstart;
2684     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2685     *d += rstart;
2686 
2687   }
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2692 {
2693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2694   PetscErrorCode ierr;
2695 
2696   PetscFunctionBegin;
2697   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 /* -------------------------------------------------------------------*/
2702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2703                                        MatGetRow_MPIAIJ,
2704                                        MatRestoreRow_MPIAIJ,
2705                                        MatMult_MPIAIJ,
2706                                 /* 4*/ MatMultAdd_MPIAIJ,
2707                                        MatMultTranspose_MPIAIJ,
2708                                        MatMultTransposeAdd_MPIAIJ,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                 /*10*/ NULL,
2713                                        NULL,
2714                                        NULL,
2715                                        MatSOR_MPIAIJ,
2716                                        MatTranspose_MPIAIJ,
2717                                 /*15*/ MatGetInfo_MPIAIJ,
2718                                        MatEqual_MPIAIJ,
2719                                        MatGetDiagonal_MPIAIJ,
2720                                        MatDiagonalScale_MPIAIJ,
2721                                        MatNorm_MPIAIJ,
2722                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2723                                        MatAssemblyEnd_MPIAIJ,
2724                                        MatSetOption_MPIAIJ,
2725                                        MatZeroEntries_MPIAIJ,
2726                                 /*24*/ MatZeroRows_MPIAIJ,
2727                                        NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        NULL,
2731                                 /*29*/ MatSetUp_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        MatGetDiagonalBlock_MPIAIJ,
2735                                        NULL,
2736                                 /*34*/ MatDuplicate_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                 /*39*/ MatAXPY_MPIAIJ,
2742                                        MatCreateSubMatrices_MPIAIJ,
2743                                        MatIncreaseOverlap_MPIAIJ,
2744                                        MatGetValues_MPIAIJ,
2745                                        MatCopy_MPIAIJ,
2746                                 /*44*/ MatGetRowMax_MPIAIJ,
2747                                        MatScale_MPIAIJ,
2748                                        MatShift_MPIAIJ,
2749                                        MatDiagonalSet_MPIAIJ,
2750                                        MatZeroRowsColumns_MPIAIJ,
2751                                 /*49*/ MatSetRandom_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2757                                        NULL,
2758                                        MatSetUnfactored_MPIAIJ,
2759                                        MatPermute_MPIAIJ,
2760                                        NULL,
2761                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2762                                        MatDestroy_MPIAIJ,
2763                                        MatView_MPIAIJ,
2764                                        NULL,
2765                                        NULL,
2766                                 /*64*/ NULL,
2767                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2772                                        MatGetRowMinAbs_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                        NULL,
2777                                 /*75*/ MatFDColoringApply_AIJ,
2778                                        MatSetFromOptions_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        MatFindZeroDiagonals_MPIAIJ,
2782                                 /*80*/ NULL,
2783                                        NULL,
2784                                        NULL,
2785                                 /*83*/ MatLoad_MPIAIJ,
2786                                        MatIsSymmetric_MPIAIJ,
2787                                        NULL,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                 /*89*/ NULL,
2792                                        NULL,
2793                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        MatBindToCPU_MPIAIJ,
2801                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        MatConjugate_MPIAIJ,
2805                                        NULL,
2806                                 /*104*/MatSetValuesRow_MPIAIJ,
2807                                        MatRealPart_MPIAIJ,
2808                                        MatImaginaryPart_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                 /*109*/NULL,
2812                                        NULL,
2813                                        MatGetRowMin_MPIAIJ,
2814                                        NULL,
2815                                        MatMissingDiagonal_MPIAIJ,
2816                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2817                                        NULL,
2818                                        MatGetGhosts_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        MatGetMultiProcBlock_MPIAIJ,
2826                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2827                                        MatGetColumnReductions_MPIAIJ,
2828                                        MatInvertBlockDiagonal_MPIAIJ,
2829                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2830                                        MatCreateSubMatricesMPI_MPIAIJ,
2831                                 /*129*/NULL,
2832                                        NULL,
2833                                        NULL,
2834                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2835                                        NULL,
2836                                 /*134*/NULL,
2837                                        NULL,
2838                                        NULL,
2839                                        NULL,
2840                                        NULL,
2841                                 /*139*/MatSetBlockSizes_MPIAIJ,
2842                                        NULL,
2843                                        NULL,
2844                                        MatFDColoringSetUp_MPIXAIJ,
2845                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2846                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2847                                 /*145*/NULL,
2848                                        NULL,
2849                                        NULL
2850 };
2851 
2852 /* ----------------------------------------------------------------------------------------*/
2853 
2854 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2855 {
2856   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2857   PetscErrorCode ierr;
2858 
2859   PetscFunctionBegin;
2860   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2861   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2866 {
2867   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2868   PetscErrorCode ierr;
2869 
2870   PetscFunctionBegin;
2871   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2872   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2877 {
2878   Mat_MPIAIJ     *b;
2879   PetscErrorCode ierr;
2880   PetscMPIInt    size;
2881 
2882   PetscFunctionBegin;
2883   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2884   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2885   b = (Mat_MPIAIJ*)B->data;
2886 
2887 #if defined(PETSC_USE_CTABLE)
2888   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2889 #else
2890   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2891 #endif
2892   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2893   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2894   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2895 
2896   /* Because the B will have been resized we simply destroy it and create a new one each time */
2897   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2898   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2899   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2900   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2901   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2902   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2903   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2904 
2905   if (!B->preallocated) {
2906     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2907     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2908     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2909     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2910     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2911   }
2912 
2913   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2914   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2915   B->preallocated  = PETSC_TRUE;
2916   B->was_assembled = PETSC_FALSE;
2917   B->assembled     = PETSC_FALSE;
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2922 {
2923   Mat_MPIAIJ     *b;
2924   PetscErrorCode ierr;
2925 
2926   PetscFunctionBegin;
2927   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2928   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2929   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2930   b = (Mat_MPIAIJ*)B->data;
2931 
2932 #if defined(PETSC_USE_CTABLE)
2933   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2934 #else
2935   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2936 #endif
2937   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2938   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2939   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2940 
2941   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2942   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2943   B->preallocated  = PETSC_TRUE;
2944   B->was_assembled = PETSC_FALSE;
2945   B->assembled = PETSC_FALSE;
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2950 {
2951   Mat            mat;
2952   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2953   PetscErrorCode ierr;
2954 
2955   PetscFunctionBegin;
2956   *newmat = NULL;
2957   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2958   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2959   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2960   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2961   a       = (Mat_MPIAIJ*)mat->data;
2962 
2963   mat->factortype   = matin->factortype;
2964   mat->assembled    = matin->assembled;
2965   mat->insertmode   = NOT_SET_VALUES;
2966   mat->preallocated = matin->preallocated;
2967 
2968   a->size         = oldmat->size;
2969   a->rank         = oldmat->rank;
2970   a->donotstash   = oldmat->donotstash;
2971   a->roworiented  = oldmat->roworiented;
2972   a->rowindices   = NULL;
2973   a->rowvalues    = NULL;
2974   a->getrowactive = PETSC_FALSE;
2975 
2976   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2977   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2978 
2979   if (oldmat->colmap) {
2980 #if defined(PETSC_USE_CTABLE)
2981     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2982 #else
2983     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2984     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2985     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2986 #endif
2987   } else a->colmap = NULL;
2988   if (oldmat->garray) {
2989     PetscInt len;
2990     len  = oldmat->B->cmap->n;
2991     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2992     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2993     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2994   } else a->garray = NULL;
2995 
2996   /* It may happen MatDuplicate is called with a non-assembled matrix
2997      In fact, MatDuplicate only requires the matrix to be preallocated
2998      This may happen inside a DMCreateMatrix_Shell */
2999   if (oldmat->lvec) {
3000     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3001     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3002   }
3003   if (oldmat->Mvctx) {
3004     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3005     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3006   }
3007   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3008   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3009   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3010   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3011   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3012   *newmat = mat;
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3017 {
3018   PetscBool      isbinary, ishdf5;
3019   PetscErrorCode ierr;
3020 
3021   PetscFunctionBegin;
3022   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3023   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3024   /* force binary viewer to load .info file if it has not yet done so */
3025   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3026   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3027   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3028   if (isbinary) {
3029     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3030   } else if (ishdf5) {
3031 #if defined(PETSC_HAVE_HDF5)
3032     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3033 #else
3034     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3035 #endif
3036   } else {
3037     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3038   }
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3043 {
3044   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3045   PetscInt       *rowidxs,*colidxs;
3046   PetscScalar    *matvals;
3047   PetscErrorCode ierr;
3048 
3049   PetscFunctionBegin;
3050   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3051 
3052   /* read in matrix header */
3053   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3054   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3055   M  = header[1]; N = header[2]; nz = header[3];
3056   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3057   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3058   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3059 
3060   /* set block sizes from the viewer's .info file */
3061   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3062   /* set global sizes if not set already */
3063   if (mat->rmap->N < 0) mat->rmap->N = M;
3064   if (mat->cmap->N < 0) mat->cmap->N = N;
3065   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3066   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3067 
3068   /* check if the matrix sizes are correct */
3069   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3070   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3071 
3072   /* read in row lengths and build row indices */
3073   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3074   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3075   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3076   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3077   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3078   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3079   /* read in column indices and matrix values */
3080   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3081   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3082   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3083   /* store matrix indices and values */
3084   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3085   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3086   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 /* Not scalable because of ISAllGather() unless getting all columns. */
3091 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3092 {
3093   PetscErrorCode ierr;
3094   IS             iscol_local;
3095   PetscBool      isstride;
3096   PetscMPIInt    lisstride=0,gisstride;
3097 
3098   PetscFunctionBegin;
3099   /* check if we are grabbing all columns*/
3100   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3101 
3102   if (isstride) {
3103     PetscInt  start,len,mstart,mlen;
3104     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3105     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3106     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3107     if (mstart == start && mlen-mstart == len) lisstride = 1;
3108   }
3109 
3110   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3111   if (gisstride) {
3112     PetscInt N;
3113     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3114     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3115     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3116     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3117   } else {
3118     PetscInt cbs;
3119     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3120     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3121     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3122   }
3123 
3124   *isseq = iscol_local;
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 /*
3129  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3130  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3131 
3132  Input Parameters:
3133    mat - matrix
3134    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3135            i.e., mat->rstart <= isrow[i] < mat->rend
3136    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3137            i.e., mat->cstart <= iscol[i] < mat->cend
3138  Output Parameter:
3139    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3140    iscol_o - sequential column index set for retrieving mat->B
3141    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3142  */
3143 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3144 {
3145   PetscErrorCode ierr;
3146   Vec            x,cmap;
3147   const PetscInt *is_idx;
3148   PetscScalar    *xarray,*cmaparray;
3149   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3150   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3151   Mat            B=a->B;
3152   Vec            lvec=a->lvec,lcmap;
3153   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3154   MPI_Comm       comm;
3155   VecScatter     Mvctx=a->Mvctx;
3156 
3157   PetscFunctionBegin;
3158   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3159   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3160 
3161   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3162   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3163   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3164   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3165   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3166 
3167   /* Get start indices */
3168   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3169   isstart -= ncols;
3170   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3171 
3172   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3173   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3174   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3175   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3176   for (i=0; i<ncols; i++) {
3177     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3178     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3179     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3180   }
3181   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3182   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3183   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3184 
3185   /* Get iscol_d */
3186   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3187   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3188   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3189 
3190   /* Get isrow_d */
3191   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3192   rstart = mat->rmap->rstart;
3193   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3194   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3195   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3196   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3197 
3198   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3199   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3200   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3201 
3202   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3203   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3204   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3205 
3206   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3207 
3208   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3209   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3210 
3211   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3212   /* off-process column indices */
3213   count = 0;
3214   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3215   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3216 
3217   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3218   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3219   for (i=0; i<Bn; i++) {
3220     if (PetscRealPart(xarray[i]) > -1.0) {
3221       idx[count]     = i;                   /* local column index in off-diagonal part B */
3222       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3223       count++;
3224     }
3225   }
3226   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3227   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3228 
3229   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3230   /* cannot ensure iscol_o has same blocksize as iscol! */
3231 
3232   ierr = PetscFree(idx);CHKERRQ(ierr);
3233   *garray = cmap1;
3234 
3235   ierr = VecDestroy(&x);CHKERRQ(ierr);
3236   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3237   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3242 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3243 {
3244   PetscErrorCode ierr;
3245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3246   Mat            M = NULL;
3247   MPI_Comm       comm;
3248   IS             iscol_d,isrow_d,iscol_o;
3249   Mat            Asub = NULL,Bsub = NULL;
3250   PetscInt       n;
3251 
3252   PetscFunctionBegin;
3253   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3254 
3255   if (call == MAT_REUSE_MATRIX) {
3256     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3257     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3258     PetscCheckFalse(!isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3259 
3260     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3261     PetscCheckFalse(!iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3262 
3263     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3264     PetscCheckFalse(!iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3265 
3266     /* Update diagonal and off-diagonal portions of submat */
3267     asub = (Mat_MPIAIJ*)(*submat)->data;
3268     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3269     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3270     if (n) {
3271       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3272     }
3273     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3274     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3275 
3276   } else { /* call == MAT_INITIAL_MATRIX) */
3277     const PetscInt *garray;
3278     PetscInt        BsubN;
3279 
3280     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3281     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3282 
3283     /* Create local submatrices Asub and Bsub */
3284     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3285     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3286 
3287     /* Create submatrix M */
3288     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3289 
3290     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3291     asub = (Mat_MPIAIJ*)M->data;
3292 
3293     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3294     n = asub->B->cmap->N;
3295     if (BsubN > n) {
3296       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3297       const PetscInt *idx;
3298       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3299       ierr = PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3300 
3301       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3302       j = 0;
3303       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3304       for (i=0; i<n; i++) {
3305         if (j >= BsubN) break;
3306         while (subgarray[i] > garray[j]) j++;
3307 
3308         if (subgarray[i] == garray[j]) {
3309           idx_new[i] = idx[j++];
3310         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3311       }
3312       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3313 
3314       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3315       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3316 
3317     } else if (BsubN < n) {
3318       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3319     }
3320 
3321     ierr = PetscFree(garray);CHKERRQ(ierr);
3322     *submat = M;
3323 
3324     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3325     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3326     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3327 
3328     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3329     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3330 
3331     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3332     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3333   }
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3338 {
3339   PetscErrorCode ierr;
3340   IS             iscol_local=NULL,isrow_d;
3341   PetscInt       csize;
3342   PetscInt       n,i,j,start,end;
3343   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3344   MPI_Comm       comm;
3345 
3346   PetscFunctionBegin;
3347   /* If isrow has same processor distribution as mat,
3348      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3349   if (call == MAT_REUSE_MATRIX) {
3350     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3351     if (isrow_d) {
3352       sameRowDist  = PETSC_TRUE;
3353       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3354     } else {
3355       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3356       if (iscol_local) {
3357         sameRowDist  = PETSC_TRUE;
3358         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3359       }
3360     }
3361   } else {
3362     /* Check if isrow has same processor distribution as mat */
3363     sameDist[0] = PETSC_FALSE;
3364     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3365     if (!n) {
3366       sameDist[0] = PETSC_TRUE;
3367     } else {
3368       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3369       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3370       if (i >= start && j < end) {
3371         sameDist[0] = PETSC_TRUE;
3372       }
3373     }
3374 
3375     /* Check if iscol has same processor distribution as mat */
3376     sameDist[1] = PETSC_FALSE;
3377     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3378     if (!n) {
3379       sameDist[1] = PETSC_TRUE;
3380     } else {
3381       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3382       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3383       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3384     }
3385 
3386     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3387     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3388     sameRowDist = tsameDist[0];
3389   }
3390 
3391   if (sameRowDist) {
3392     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3393       /* isrow and iscol have same processor distribution as mat */
3394       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3395       PetscFunctionReturn(0);
3396     } else { /* sameRowDist */
3397       /* isrow has same processor distribution as mat */
3398       if (call == MAT_INITIAL_MATRIX) {
3399         PetscBool sorted;
3400         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3401         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3402         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3403         PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3404 
3405         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3406         if (sorted) {
3407           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3408           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3409           PetscFunctionReturn(0);
3410         }
3411       } else { /* call == MAT_REUSE_MATRIX */
3412         IS iscol_sub;
3413         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3414         if (iscol_sub) {
3415           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3416           PetscFunctionReturn(0);
3417         }
3418       }
3419     }
3420   }
3421 
3422   /* General case: iscol -> iscol_local which has global size of iscol */
3423   if (call == MAT_REUSE_MATRIX) {
3424     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3425     PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3426   } else {
3427     if (!iscol_local) {
3428       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3429     }
3430   }
3431 
3432   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3433   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3434 
3435   if (call == MAT_INITIAL_MATRIX) {
3436     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3437     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3438   }
3439   PetscFunctionReturn(0);
3440 }
3441 
3442 /*@C
3443      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3444          and "off-diagonal" part of the matrix in CSR format.
3445 
3446    Collective
3447 
3448    Input Parameters:
3449 +  comm - MPI communicator
3450 .  A - "diagonal" portion of matrix
3451 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3452 -  garray - global index of B columns
3453 
3454    Output Parameter:
3455 .   mat - the matrix, with input A as its local diagonal matrix
3456    Level: advanced
3457 
3458    Notes:
3459        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3460        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3461 
3462 .seealso: MatCreateMPIAIJWithSplitArrays()
3463 @*/
3464 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3465 {
3466   PetscErrorCode    ierr;
3467   Mat_MPIAIJ        *maij;
3468   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3469   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3470   const PetscScalar *oa;
3471   Mat               Bnew;
3472   PetscInt          m,n,N;
3473 
3474   PetscFunctionBegin;
3475   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3476   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3477   PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3478   PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3479   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3480   /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3481 
3482   /* Get global columns of mat */
3483   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3484 
3485   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3486   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3487   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3488   maij = (Mat_MPIAIJ*)(*mat)->data;
3489 
3490   (*mat)->preallocated = PETSC_TRUE;
3491 
3492   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3493   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3494 
3495   /* Set A as diagonal portion of *mat */
3496   maij->A = A;
3497 
3498   nz = oi[m];
3499   for (i=0; i<nz; i++) {
3500     col   = oj[i];
3501     oj[i] = garray[col];
3502   }
3503 
3504   /* Set Bnew as off-diagonal portion of *mat */
3505   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3506   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3507   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3508   bnew        = (Mat_SeqAIJ*)Bnew->data;
3509   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3510   maij->B     = Bnew;
3511 
3512   PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3513 
3514   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3515   b->free_a       = PETSC_FALSE;
3516   b->free_ij      = PETSC_FALSE;
3517   ierr = MatDestroy(&B);CHKERRQ(ierr);
3518 
3519   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3520   bnew->free_a       = PETSC_TRUE;
3521   bnew->free_ij      = PETSC_TRUE;
3522 
3523   /* condense columns of maij->B */
3524   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3525   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3526   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3527   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3528   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3529   PetscFunctionReturn(0);
3530 }
3531 
3532 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3533 
3534 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3535 {
3536   PetscErrorCode ierr;
3537   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3538   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3539   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3540   Mat            M,Msub,B=a->B;
3541   MatScalar      *aa;
3542   Mat_SeqAIJ     *aij;
3543   PetscInt       *garray = a->garray,*colsub,Ncols;
3544   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3545   IS             iscol_sub,iscmap;
3546   const PetscInt *is_idx,*cmap;
3547   PetscBool      allcolumns=PETSC_FALSE;
3548   MPI_Comm       comm;
3549 
3550   PetscFunctionBegin;
3551   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3552   if (call == MAT_REUSE_MATRIX) {
3553     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3554     PetscCheckFalse(!iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3555     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3556 
3557     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3558     PetscCheckFalse(!iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3559 
3560     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3561     PetscCheckFalse(!Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3562 
3563     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3564 
3565   } else { /* call == MAT_INITIAL_MATRIX) */
3566     PetscBool flg;
3567 
3568     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3569     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3570 
3571     /* (1) iscol -> nonscalable iscol_local */
3572     /* Check for special case: each processor gets entire matrix columns */
3573     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3574     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3575     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3576     if (allcolumns) {
3577       iscol_sub = iscol_local;
3578       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3579       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3580 
3581     } else {
3582       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3583       PetscInt *idx,*cmap1,k;
3584       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3585       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3586       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3587       count = 0;
3588       k     = 0;
3589       for (i=0; i<Ncols; i++) {
3590         j = is_idx[i];
3591         if (j >= cstart && j < cend) {
3592           /* diagonal part of mat */
3593           idx[count]     = j;
3594           cmap1[count++] = i; /* column index in submat */
3595         } else if (Bn) {
3596           /* off-diagonal part of mat */
3597           if (j == garray[k]) {
3598             idx[count]     = j;
3599             cmap1[count++] = i;  /* column index in submat */
3600           } else if (j > garray[k]) {
3601             while (j > garray[k] && k < Bn-1) k++;
3602             if (j == garray[k]) {
3603               idx[count]     = j;
3604               cmap1[count++] = i; /* column index in submat */
3605             }
3606           }
3607         }
3608       }
3609       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3610 
3611       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3612       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3613       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3614 
3615       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3616     }
3617 
3618     /* (3) Create sequential Msub */
3619     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3620   }
3621 
3622   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3623   aij  = (Mat_SeqAIJ*)(Msub)->data;
3624   ii   = aij->i;
3625   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3626 
3627   /*
3628       m - number of local rows
3629       Ncols - number of columns (same on all processors)
3630       rstart - first row in new global matrix generated
3631   */
3632   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3633 
3634   if (call == MAT_INITIAL_MATRIX) {
3635     /* (4) Create parallel newmat */
3636     PetscMPIInt    rank,size;
3637     PetscInt       csize;
3638 
3639     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3640     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3641 
3642     /*
3643         Determine the number of non-zeros in the diagonal and off-diagonal
3644         portions of the matrix in order to do correct preallocation
3645     */
3646 
3647     /* first get start and end of "diagonal" columns */
3648     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3649     if (csize == PETSC_DECIDE) {
3650       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3651       if (mglobal == Ncols) { /* square matrix */
3652         nlocal = m;
3653       } else {
3654         nlocal = Ncols/size + ((Ncols % size) > rank);
3655       }
3656     } else {
3657       nlocal = csize;
3658     }
3659     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3660     rstart = rend - nlocal;
3661     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3662 
3663     /* next, compute all the lengths */
3664     jj    = aij->j;
3665     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3666     olens = dlens + m;
3667     for (i=0; i<m; i++) {
3668       jend = ii[i+1] - ii[i];
3669       olen = 0;
3670       dlen = 0;
3671       for (j=0; j<jend; j++) {
3672         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3673         else dlen++;
3674         jj++;
3675       }
3676       olens[i] = olen;
3677       dlens[i] = dlen;
3678     }
3679 
3680     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3681     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3682 
3683     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3684     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3685     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3686     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3687     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3688     ierr = PetscFree(dlens);CHKERRQ(ierr);
3689 
3690   } else { /* call == MAT_REUSE_MATRIX */
3691     M    = *newmat;
3692     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3693     PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3694     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3695     /*
3696          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3697        rather than the slower MatSetValues().
3698     */
3699     M->was_assembled = PETSC_TRUE;
3700     M->assembled     = PETSC_FALSE;
3701   }
3702 
3703   /* (5) Set values of Msub to *newmat */
3704   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3705   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3706 
3707   jj   = aij->j;
3708   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3709   for (i=0; i<m; i++) {
3710     row = rstart + i;
3711     nz  = ii[i+1] - ii[i];
3712     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3713     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3714     jj += nz; aa += nz;
3715   }
3716   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3717   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3718 
3719   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3720   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3721 
3722   ierr = PetscFree(colsub);CHKERRQ(ierr);
3723 
3724   /* save Msub, iscol_sub and iscmap used in processor for next request */
3725   if (call == MAT_INITIAL_MATRIX) {
3726     *newmat = M;
3727     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3728     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3729 
3730     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3731     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3732 
3733     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3734     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3735 
3736     if (iscol_local) {
3737       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3738       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3739     }
3740   }
3741   PetscFunctionReturn(0);
3742 }
3743 
3744 /*
3745     Not great since it makes two copies of the submatrix, first an SeqAIJ
3746   in local and then by concatenating the local matrices the end result.
3747   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3748 
3749   Note: This requires a sequential iscol with all indices.
3750 */
3751 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3752 {
3753   PetscErrorCode ierr;
3754   PetscMPIInt    rank,size;
3755   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3756   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3757   Mat            M,Mreuse;
3758   MatScalar      *aa,*vwork;
3759   MPI_Comm       comm;
3760   Mat_SeqAIJ     *aij;
3761   PetscBool      colflag,allcolumns=PETSC_FALSE;
3762 
3763   PetscFunctionBegin;
3764   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3765   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3766   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3767 
3768   /* Check for special case: each processor gets entire matrix columns */
3769   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3770   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3771   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3772   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3773 
3774   if (call ==  MAT_REUSE_MATRIX) {
3775     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3776     PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3777     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3778   } else {
3779     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3780   }
3781 
3782   /*
3783       m - number of local rows
3784       n - number of columns (same on all processors)
3785       rstart - first row in new global matrix generated
3786   */
3787   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3788   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3789   if (call == MAT_INITIAL_MATRIX) {
3790     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3791     ii  = aij->i;
3792     jj  = aij->j;
3793 
3794     /*
3795         Determine the number of non-zeros in the diagonal and off-diagonal
3796         portions of the matrix in order to do correct preallocation
3797     */
3798 
3799     /* first get start and end of "diagonal" columns */
3800     if (csize == PETSC_DECIDE) {
3801       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3802       if (mglobal == n) { /* square matrix */
3803         nlocal = m;
3804       } else {
3805         nlocal = n/size + ((n % size) > rank);
3806       }
3807     } else {
3808       nlocal = csize;
3809     }
3810     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3811     rstart = rend - nlocal;
3812     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3813 
3814     /* next, compute all the lengths */
3815     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3816     olens = dlens + m;
3817     for (i=0; i<m; i++) {
3818       jend = ii[i+1] - ii[i];
3819       olen = 0;
3820       dlen = 0;
3821       for (j=0; j<jend; j++) {
3822         if (*jj < rstart || *jj >= rend) olen++;
3823         else dlen++;
3824         jj++;
3825       }
3826       olens[i] = olen;
3827       dlens[i] = dlen;
3828     }
3829     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3830     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3831     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3832     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3833     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3834     ierr = PetscFree(dlens);CHKERRQ(ierr);
3835   } else {
3836     PetscInt ml,nl;
3837 
3838     M    = *newmat;
3839     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3840     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3841     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3842     /*
3843          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3844        rather than the slower MatSetValues().
3845     */
3846     M->was_assembled = PETSC_TRUE;
3847     M->assembled     = PETSC_FALSE;
3848   }
3849   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3850   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3851   ii   = aij->i;
3852   jj   = aij->j;
3853 
3854   /* trigger copy to CPU if needed */
3855   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3856   for (i=0; i<m; i++) {
3857     row   = rstart + i;
3858     nz    = ii[i+1] - ii[i];
3859     cwork = jj; jj += nz;
3860     vwork = aa; aa += nz;
3861     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3862   }
3863   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3864 
3865   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3866   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3867   *newmat = M;
3868 
3869   /* save submatrix used in processor for next request */
3870   if (call ==  MAT_INITIAL_MATRIX) {
3871     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3872     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3873   }
3874   PetscFunctionReturn(0);
3875 }
3876 
3877 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3878 {
3879   PetscInt       m,cstart, cend,j,nnz,i,d;
3880   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3881   const PetscInt *JJ;
3882   PetscErrorCode ierr;
3883   PetscBool      nooffprocentries;
3884 
3885   PetscFunctionBegin;
3886   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3887 
3888   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3889   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3890   m      = B->rmap->n;
3891   cstart = B->cmap->rstart;
3892   cend   = B->cmap->rend;
3893   rstart = B->rmap->rstart;
3894 
3895   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3896 
3897   if (PetscDefined(USE_DEBUG)) {
3898     for (i=0; i<m; i++) {
3899       nnz = Ii[i+1]- Ii[i];
3900       JJ  = J + Ii[i];
3901       PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3902       PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3903       PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3904     }
3905   }
3906 
3907   for (i=0; i<m; i++) {
3908     nnz     = Ii[i+1]- Ii[i];
3909     JJ      = J + Ii[i];
3910     nnz_max = PetscMax(nnz_max,nnz);
3911     d       = 0;
3912     for (j=0; j<nnz; j++) {
3913       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3914     }
3915     d_nnz[i] = d;
3916     o_nnz[i] = nnz - d;
3917   }
3918   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3919   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3920 
3921   for (i=0; i<m; i++) {
3922     ii   = i + rstart;
3923     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3924   }
3925   nooffprocentries    = B->nooffprocentries;
3926   B->nooffprocentries = PETSC_TRUE;
3927   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3928   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3929   B->nooffprocentries = nooffprocentries;
3930 
3931   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 /*@
3936    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3937    (the default parallel PETSc format).
3938 
3939    Collective
3940 
3941    Input Parameters:
3942 +  B - the matrix
3943 .  i - the indices into j for the start of each local row (starts with zero)
3944 .  j - the column indices for each local row (starts with zero)
3945 -  v - optional values in the matrix
3946 
3947    Level: developer
3948 
3949    Notes:
3950        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3951      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3952      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3953 
3954        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3955 
3956        The format which is used for the sparse matrix input, is equivalent to a
3957     row-major ordering.. i.e for the following matrix, the input data expected is
3958     as shown
3959 
3960 $        1 0 0
3961 $        2 0 3     P0
3962 $       -------
3963 $        4 5 6     P1
3964 $
3965 $     Process0 [P0]: rows_owned=[0,1]
3966 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3967 $        j =  {0,0,2}  [size = 3]
3968 $        v =  {1,2,3}  [size = 3]
3969 $
3970 $     Process1 [P1]: rows_owned=[2]
3971 $        i =  {0,3}    [size = nrow+1  = 1+1]
3972 $        j =  {0,1,2}  [size = 3]
3973 $        v =  {4,5,6}  [size = 3]
3974 
3975 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3976           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3977 @*/
3978 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3979 {
3980   PetscErrorCode ierr;
3981 
3982   PetscFunctionBegin;
3983   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3984   PetscFunctionReturn(0);
3985 }
3986 
3987 /*@C
3988    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3989    (the default parallel PETSc format).  For good matrix assembly performance
3990    the user should preallocate the matrix storage by setting the parameters
3991    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3992    performance can be increased by more than a factor of 50.
3993 
3994    Collective
3995 
3996    Input Parameters:
3997 +  B - the matrix
3998 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3999            (same value is used for all local rows)
4000 .  d_nnz - array containing the number of nonzeros in the various rows of the
4001            DIAGONAL portion of the local submatrix (possibly different for each row)
4002            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4003            The size of this array is equal to the number of local rows, i.e 'm'.
4004            For matrices that will be factored, you must leave room for (and set)
4005            the diagonal entry even if it is zero.
4006 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4007            submatrix (same value is used for all local rows).
4008 -  o_nnz - array containing the number of nonzeros in the various rows of the
4009            OFF-DIAGONAL portion of the local submatrix (possibly different for
4010            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4011            structure. The size of this array is equal to the number
4012            of local rows, i.e 'm'.
4013 
4014    If the *_nnz parameter is given then the *_nz parameter is ignored
4015 
4016    The AIJ format (also called the Yale sparse matrix format or
4017    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4018    storage.  The stored row and column indices begin with zero.
4019    See Users-Manual: ch_mat for details.
4020 
4021    The parallel matrix is partitioned such that the first m0 rows belong to
4022    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4023    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4024 
4025    The DIAGONAL portion of the local submatrix of a processor can be defined
4026    as the submatrix which is obtained by extraction the part corresponding to
4027    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4028    first row that belongs to the processor, r2 is the last row belonging to
4029    the this processor, and c1-c2 is range of indices of the local part of a
4030    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4031    common case of a square matrix, the row and column ranges are the same and
4032    the DIAGONAL part is also square. The remaining portion of the local
4033    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4034 
4035    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4036 
4037    You can call MatGetInfo() to get information on how effective the preallocation was;
4038    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4039    You can also run with the option -info and look for messages with the string
4040    malloc in them to see if additional memory allocation was needed.
4041 
4042    Example usage:
4043 
4044    Consider the following 8x8 matrix with 34 non-zero values, that is
4045    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4046    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4047    as follows:
4048 
4049 .vb
4050             1  2  0  |  0  3  0  |  0  4
4051     Proc0   0  5  6  |  7  0  0  |  8  0
4052             9  0 10  | 11  0  0  | 12  0
4053     -------------------------------------
4054            13  0 14  | 15 16 17  |  0  0
4055     Proc1   0 18  0  | 19 20 21  |  0  0
4056             0  0  0  | 22 23  0  | 24  0
4057     -------------------------------------
4058     Proc2  25 26 27  |  0  0 28  | 29  0
4059            30  0  0  | 31 32 33  |  0 34
4060 .ve
4061 
4062    This can be represented as a collection of submatrices as:
4063 
4064 .vb
4065       A B C
4066       D E F
4067       G H I
4068 .ve
4069 
4070    Where the submatrices A,B,C are owned by proc0, D,E,F are
4071    owned by proc1, G,H,I are owned by proc2.
4072 
4073    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4074    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4075    The 'M','N' parameters are 8,8, and have the same values on all procs.
4076 
4077    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4078    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4079    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4080    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4081    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4082    matrix, ans [DF] as another SeqAIJ matrix.
4083 
4084    When d_nz, o_nz parameters are specified, d_nz storage elements are
4085    allocated for every row of the local diagonal submatrix, and o_nz
4086    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4087    One way to choose d_nz and o_nz is to use the max nonzerors per local
4088    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4089    In this case, the values of d_nz,o_nz are:
4090 .vb
4091      proc0 : dnz = 2, o_nz = 2
4092      proc1 : dnz = 3, o_nz = 2
4093      proc2 : dnz = 1, o_nz = 4
4094 .ve
4095    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4096    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4097    for proc3. i.e we are using 12+15+10=37 storage locations to store
4098    34 values.
4099 
4100    When d_nnz, o_nnz parameters are specified, the storage is specified
4101    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4102    In the above case the values for d_nnz,o_nnz are:
4103 .vb
4104      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4105      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4106      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4107 .ve
4108    Here the space allocated is sum of all the above values i.e 34, and
4109    hence pre-allocation is perfect.
4110 
4111    Level: intermediate
4112 
4113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4114           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4115 @*/
4116 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4117 {
4118   PetscErrorCode ierr;
4119 
4120   PetscFunctionBegin;
4121   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4122   PetscValidType(B,1);
4123   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4124   PetscFunctionReturn(0);
4125 }
4126 
4127 /*@
4128      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4129          CSR format for the local rows.
4130 
4131    Collective
4132 
4133    Input Parameters:
4134 +  comm - MPI communicator
4135 .  m - number of local rows (Cannot be PETSC_DECIDE)
4136 .  n - This value should be the same as the local size used in creating the
4137        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4138        calculated if N is given) For square matrices n is almost always m.
4139 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4140 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4141 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4142 .   j - column indices
4143 -   a - matrix values
4144 
4145    Output Parameter:
4146 .   mat - the matrix
4147 
4148    Level: intermediate
4149 
4150    Notes:
4151        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4152      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4153      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4154 
4155        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4156 
4157        The format which is used for the sparse matrix input, is equivalent to a
4158     row-major ordering.. i.e for the following matrix, the input data expected is
4159     as shown
4160 
4161        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4162 
4163 $        1 0 0
4164 $        2 0 3     P0
4165 $       -------
4166 $        4 5 6     P1
4167 $
4168 $     Process0 [P0]: rows_owned=[0,1]
4169 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4170 $        j =  {0,0,2}  [size = 3]
4171 $        v =  {1,2,3}  [size = 3]
4172 $
4173 $     Process1 [P1]: rows_owned=[2]
4174 $        i =  {0,3}    [size = nrow+1  = 1+1]
4175 $        j =  {0,1,2}  [size = 3]
4176 $        v =  {4,5,6}  [size = 3]
4177 
4178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4179           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4180 @*/
4181 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4182 {
4183   PetscErrorCode ierr;
4184 
4185   PetscFunctionBegin;
4186   PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4187   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4188   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4189   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4190   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4191   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4192   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4193   PetscFunctionReturn(0);
4194 }
4195 
4196 /*@
4197      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4198          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4199 
4200    Collective
4201 
4202    Input Parameters:
4203 +  mat - the matrix
4204 .  m - number of local rows (Cannot be PETSC_DECIDE)
4205 .  n - This value should be the same as the local size used in creating the
4206        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4207        calculated if N is given) For square matrices n is almost always m.
4208 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4209 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4210 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4211 .  J - column indices
4212 -  v - matrix values
4213 
4214    Level: intermediate
4215 
4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4217           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4218 @*/
4219 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4220 {
4221   PetscErrorCode ierr;
4222   PetscInt       cstart,nnz,i,j;
4223   PetscInt       *ld;
4224   PetscBool      nooffprocentries;
4225   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4226   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4227   PetscScalar    *ad,*ao;
4228   const PetscInt *Adi = Ad->i;
4229   PetscInt       ldi,Iii,md;
4230 
4231   PetscFunctionBegin;
4232   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4233   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4234   PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4235   PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4236 
4237   ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4238   ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4239   cstart = mat->cmap->rstart;
4240   if (!Aij->ld) {
4241     /* count number of entries below block diagonal */
4242     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4243     Aij->ld = ld;
4244     for (i=0; i<m; i++) {
4245       nnz  = Ii[i+1]- Ii[i];
4246       j     = 0;
4247       while  (J[j] < cstart && j < nnz) {j++;}
4248       J    += nnz;
4249       ld[i] = j;
4250     }
4251   } else {
4252     ld = Aij->ld;
4253   }
4254 
4255   for (i=0; i<m; i++) {
4256     nnz  = Ii[i+1]- Ii[i];
4257     Iii  = Ii[i];
4258     ldi  = ld[i];
4259     md   = Adi[i+1]-Adi[i];
4260     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4261     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4262     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4263     ad  += md;
4264     ao  += nnz - md;
4265   }
4266   nooffprocentries      = mat->nooffprocentries;
4267   mat->nooffprocentries = PETSC_TRUE;
4268   ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4269   ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4270   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4271   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4272   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4273   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4274   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4275   mat->nooffprocentries = nooffprocentries;
4276   PetscFunctionReturn(0);
4277 }
4278 
4279 /*@C
4280    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4281    (the default parallel PETSc format).  For good matrix assembly performance
4282    the user should preallocate the matrix storage by setting the parameters
4283    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4284    performance can be increased by more than a factor of 50.
4285 
4286    Collective
4287 
4288    Input Parameters:
4289 +  comm - MPI communicator
4290 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4291            This value should be the same as the local size used in creating the
4292            y vector for the matrix-vector product y = Ax.
4293 .  n - This value should be the same as the local size used in creating the
4294        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4295        calculated if N is given) For square matrices n is almost always m.
4296 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4297 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4298 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4299            (same value is used for all local rows)
4300 .  d_nnz - array containing the number of nonzeros in the various rows of the
4301            DIAGONAL portion of the local submatrix (possibly different for each row)
4302            or NULL, if d_nz is used to specify the nonzero structure.
4303            The size of this array is equal to the number of local rows, i.e 'm'.
4304 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4305            submatrix (same value is used for all local rows).
4306 -  o_nnz - array containing the number of nonzeros in the various rows of the
4307            OFF-DIAGONAL portion of the local submatrix (possibly different for
4308            each row) or NULL, if o_nz is used to specify the nonzero
4309            structure. The size of this array is equal to the number
4310            of local rows, i.e 'm'.
4311 
4312    Output Parameter:
4313 .  A - the matrix
4314 
4315    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4316    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4317    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4318 
4319    Notes:
4320    If the *_nnz parameter is given then the *_nz parameter is ignored
4321 
4322    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4323    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4324    storage requirements for this matrix.
4325 
4326    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4327    processor than it must be used on all processors that share the object for
4328    that argument.
4329 
4330    The user MUST specify either the local or global matrix dimensions
4331    (possibly both).
4332 
4333    The parallel matrix is partitioned across processors such that the
4334    first m0 rows belong to process 0, the next m1 rows belong to
4335    process 1, the next m2 rows belong to process 2 etc.. where
4336    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4337    values corresponding to [m x N] submatrix.
4338 
4339    The columns are logically partitioned with the n0 columns belonging
4340    to 0th partition, the next n1 columns belonging to the next
4341    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4342 
4343    The DIAGONAL portion of the local submatrix on any given processor
4344    is the submatrix corresponding to the rows and columns m,n
4345    corresponding to the given processor. i.e diagonal matrix on
4346    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4347    etc. The remaining portion of the local submatrix [m x (N-n)]
4348    constitute the OFF-DIAGONAL portion. The example below better
4349    illustrates this concept.
4350 
4351    For a square global matrix we define each processor's diagonal portion
4352    to be its local rows and the corresponding columns (a square submatrix);
4353    each processor's off-diagonal portion encompasses the remainder of the
4354    local matrix (a rectangular submatrix).
4355 
4356    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4357 
4358    When calling this routine with a single process communicator, a matrix of
4359    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4360    type of communicator, use the construction mechanism
4361 .vb
4362      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4363 .ve
4364 
4365 $     MatCreate(...,&A);
4366 $     MatSetType(A,MATMPIAIJ);
4367 $     MatSetSizes(A, m,n,M,N);
4368 $     MatMPIAIJSetPreallocation(A,...);
4369 
4370    By default, this format uses inodes (identical nodes) when possible.
4371    We search for consecutive rows with the same nonzero structure, thereby
4372    reusing matrix information to achieve increased efficiency.
4373 
4374    Options Database Keys:
4375 +  -mat_no_inode  - Do not use inodes
4376 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4377 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4378         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4379         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4380 
4381    Example usage:
4382 
4383    Consider the following 8x8 matrix with 34 non-zero values, that is
4384    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4385    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4386    as follows
4387 
4388 .vb
4389             1  2  0  |  0  3  0  |  0  4
4390     Proc0   0  5  6  |  7  0  0  |  8  0
4391             9  0 10  | 11  0  0  | 12  0
4392     -------------------------------------
4393            13  0 14  | 15 16 17  |  0  0
4394     Proc1   0 18  0  | 19 20 21  |  0  0
4395             0  0  0  | 22 23  0  | 24  0
4396     -------------------------------------
4397     Proc2  25 26 27  |  0  0 28  | 29  0
4398            30  0  0  | 31 32 33  |  0 34
4399 .ve
4400 
4401    This can be represented as a collection of submatrices as
4402 
4403 .vb
4404       A B C
4405       D E F
4406       G H I
4407 .ve
4408 
4409    Where the submatrices A,B,C are owned by proc0, D,E,F are
4410    owned by proc1, G,H,I are owned by proc2.
4411 
4412    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4413    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4414    The 'M','N' parameters are 8,8, and have the same values on all procs.
4415 
4416    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4417    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4418    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4419    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4420    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4421    matrix, ans [DF] as another SeqAIJ matrix.
4422 
4423    When d_nz, o_nz parameters are specified, d_nz storage elements are
4424    allocated for every row of the local diagonal submatrix, and o_nz
4425    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4426    One way to choose d_nz and o_nz is to use the max nonzerors per local
4427    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4428    In this case, the values of d_nz,o_nz are
4429 .vb
4430      proc0 : dnz = 2, o_nz = 2
4431      proc1 : dnz = 3, o_nz = 2
4432      proc2 : dnz = 1, o_nz = 4
4433 .ve
4434    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4435    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4436    for proc3. i.e we are using 12+15+10=37 storage locations to store
4437    34 values.
4438 
4439    When d_nnz, o_nnz parameters are specified, the storage is specified
4440    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4441    In the above case the values for d_nnz,o_nnz are
4442 .vb
4443      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4444      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4445      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4446 .ve
4447    Here the space allocated is sum of all the above values i.e 34, and
4448    hence pre-allocation is perfect.
4449 
4450    Level: intermediate
4451 
4452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4453           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4454 @*/
4455 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4456 {
4457   PetscErrorCode ierr;
4458   PetscMPIInt    size;
4459 
4460   PetscFunctionBegin;
4461   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4462   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4463   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4464   if (size > 1) {
4465     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4466     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4467   } else {
4468     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4469     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4470   }
4471   PetscFunctionReturn(0);
4472 }
4473 
4474 /*@C
4475   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4476 
4477   Not collective
4478 
4479   Input Parameter:
4480 . A - The MPIAIJ matrix
4481 
4482   Output Parameters:
4483 + Ad - The local diagonal block as a SeqAIJ matrix
4484 . Ao - The local off-diagonal block as a SeqAIJ matrix
4485 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4486 
4487   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4488   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4489   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4490   local column numbers to global column numbers in the original matrix.
4491 
4492   Level: intermediate
4493 
4494 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4495 @*/
4496 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4497 {
4498   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4499   PetscBool      flg;
4500   PetscErrorCode ierr;
4501 
4502   PetscFunctionBegin;
4503   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4504   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4505   if (Ad)     *Ad     = a->A;
4506   if (Ao)     *Ao     = a->B;
4507   if (colmap) *colmap = a->garray;
4508   PetscFunctionReturn(0);
4509 }
4510 
4511 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4512 {
4513   PetscErrorCode ierr;
4514   PetscInt       m,N,i,rstart,nnz,Ii;
4515   PetscInt       *indx;
4516   PetscScalar    *values;
4517   MatType        rootType;
4518 
4519   PetscFunctionBegin;
4520   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4521   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4522     PetscInt       *dnz,*onz,sum,bs,cbs;
4523 
4524     if (n == PETSC_DECIDE) {
4525       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4526     }
4527     /* Check sum(n) = N */
4528     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4529     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4530 
4531     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4532     rstart -= m;
4533 
4534     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4535     for (i=0; i<m; i++) {
4536       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4537       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4538       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4539     }
4540 
4541     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4542     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4543     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4544     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4545     ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr);
4546     ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr);
4547     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4548     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4549     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4550     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4551   }
4552 
4553   /* numeric phase */
4554   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4555   for (i=0; i<m; i++) {
4556     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4557     Ii   = i + rstart;
4558     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4559     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4560   }
4561   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4562   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4563   PetscFunctionReturn(0);
4564 }
4565 
4566 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4567 {
4568   PetscErrorCode    ierr;
4569   PetscMPIInt       rank;
4570   PetscInt          m,N,i,rstart,nnz;
4571   size_t            len;
4572   const PetscInt    *indx;
4573   PetscViewer       out;
4574   char              *name;
4575   Mat               B;
4576   const PetscScalar *values;
4577 
4578   PetscFunctionBegin;
4579   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4580   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4581   /* Should this be the type of the diagonal block of A? */
4582   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4583   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4584   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4585   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4586   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4587   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4588   for (i=0; i<m; i++) {
4589     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4590     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4591     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4592   }
4593   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4594   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4595 
4596   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4597   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4598   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4599   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4600   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4601   ierr = PetscFree(name);CHKERRQ(ierr);
4602   ierr = MatView(B,out);CHKERRQ(ierr);
4603   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4604   ierr = MatDestroy(&B);CHKERRQ(ierr);
4605   PetscFunctionReturn(0);
4606 }
4607 
4608 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4609 {
4610   PetscErrorCode      ierr;
4611   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4612 
4613   PetscFunctionBegin;
4614   if (!merge) PetscFunctionReturn(0);
4615   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4616   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4617   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4618   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4619   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4620   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4621   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4622   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4623   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4624   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4625   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4626   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4627   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4628   ierr = PetscFree(merge);CHKERRQ(ierr);
4629   PetscFunctionReturn(0);
4630 }
4631 
4632 #include <../src/mat/utils/freespace.h>
4633 #include <petscbt.h>
4634 
4635 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4636 {
4637   PetscErrorCode      ierr;
4638   MPI_Comm            comm;
4639   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4640   PetscMPIInt         size,rank,taga,*len_s;
4641   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4642   PetscInt            proc,m;
4643   PetscInt            **buf_ri,**buf_rj;
4644   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4645   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4646   MPI_Request         *s_waits,*r_waits;
4647   MPI_Status          *status;
4648   const MatScalar     *aa,*a_a;
4649   MatScalar           **abuf_r,*ba_i;
4650   Mat_Merge_SeqsToMPI *merge;
4651   PetscContainer      container;
4652 
4653   PetscFunctionBegin;
4654   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4655   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4656 
4657   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4658   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4659 
4660   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4661   PetscCheckFalse(!container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4662   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4663   ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4664   aa   = a_a;
4665 
4666   bi     = merge->bi;
4667   bj     = merge->bj;
4668   buf_ri = merge->buf_ri;
4669   buf_rj = merge->buf_rj;
4670 
4671   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4672   owners = merge->rowmap->range;
4673   len_s  = merge->len_s;
4674 
4675   /* send and recv matrix values */
4676   /*-----------------------------*/
4677   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4678   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4679 
4680   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4681   for (proc=0,k=0; proc<size; proc++) {
4682     if (!len_s[proc]) continue;
4683     i    = owners[proc];
4684     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4685     k++;
4686   }
4687 
4688   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4689   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4690   ierr = PetscFree(status);CHKERRQ(ierr);
4691 
4692   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4693   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4694 
4695   /* insert mat values of mpimat */
4696   /*----------------------------*/
4697   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4698   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4699 
4700   for (k=0; k<merge->nrecv; k++) {
4701     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4702     nrows       = *(buf_ri_k[k]);
4703     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4704     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4705   }
4706 
4707   /* set values of ba */
4708   m    = merge->rowmap->n;
4709   for (i=0; i<m; i++) {
4710     arow = owners[rank] + i;
4711     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4712     bnzi = bi[i+1] - bi[i];
4713     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4714 
4715     /* add local non-zero vals of this proc's seqmat into ba */
4716     anzi   = ai[arow+1] - ai[arow];
4717     aj     = a->j + ai[arow];
4718     aa     = a_a + ai[arow];
4719     nextaj = 0;
4720     for (j=0; nextaj<anzi; j++) {
4721       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4722         ba_i[j] += aa[nextaj++];
4723       }
4724     }
4725 
4726     /* add received vals into ba */
4727     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4728       /* i-th row */
4729       if (i == *nextrow[k]) {
4730         anzi   = *(nextai[k]+1) - *nextai[k];
4731         aj     = buf_rj[k] + *(nextai[k]);
4732         aa     = abuf_r[k] + *(nextai[k]);
4733         nextaj = 0;
4734         for (j=0; nextaj<anzi; j++) {
4735           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4736             ba_i[j] += aa[nextaj++];
4737           }
4738         }
4739         nextrow[k]++; nextai[k]++;
4740       }
4741     }
4742     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4743   }
4744   ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4745   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4746   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4747 
4748   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4749   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4750   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4751   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4752   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4753   PetscFunctionReturn(0);
4754 }
4755 
4756 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4757 {
4758   PetscErrorCode      ierr;
4759   Mat                 B_mpi;
4760   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4761   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4762   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4763   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4764   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4765   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4766   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4767   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4768   MPI_Status          *status;
4769   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4770   PetscBT             lnkbt;
4771   Mat_Merge_SeqsToMPI *merge;
4772   PetscContainer      container;
4773 
4774   PetscFunctionBegin;
4775   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4776 
4777   /* make sure it is a PETSc comm */
4778   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4779   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4780   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4781 
4782   ierr = PetscNew(&merge);CHKERRQ(ierr);
4783   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4784 
4785   /* determine row ownership */
4786   /*---------------------------------------------------------*/
4787   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4788   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4789   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4790   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4791   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4792   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4793   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4794 
4795   m      = merge->rowmap->n;
4796   owners = merge->rowmap->range;
4797 
4798   /* determine the number of messages to send, their lengths */
4799   /*---------------------------------------------------------*/
4800   len_s = merge->len_s;
4801 
4802   len          = 0; /* length of buf_si[] */
4803   merge->nsend = 0;
4804   for (proc=0; proc<size; proc++) {
4805     len_si[proc] = 0;
4806     if (proc == rank) {
4807       len_s[proc] = 0;
4808     } else {
4809       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4810       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4811     }
4812     if (len_s[proc]) {
4813       merge->nsend++;
4814       nrows = 0;
4815       for (i=owners[proc]; i<owners[proc+1]; i++) {
4816         if (ai[i+1] > ai[i]) nrows++;
4817       }
4818       len_si[proc] = 2*(nrows+1);
4819       len         += len_si[proc];
4820     }
4821   }
4822 
4823   /* determine the number and length of messages to receive for ij-structure */
4824   /*-------------------------------------------------------------------------*/
4825   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4826   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4827 
4828   /* post the Irecv of j-structure */
4829   /*-------------------------------*/
4830   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4831   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4832 
4833   /* post the Isend of j-structure */
4834   /*--------------------------------*/
4835   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4836 
4837   for (proc=0, k=0; proc<size; proc++) {
4838     if (!len_s[proc]) continue;
4839     i    = owners[proc];
4840     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4841     k++;
4842   }
4843 
4844   /* receives and sends of j-structure are complete */
4845   /*------------------------------------------------*/
4846   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4847   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4848 
4849   /* send and recv i-structure */
4850   /*---------------------------*/
4851   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4852   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4853 
4854   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4855   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4856   for (proc=0,k=0; proc<size; proc++) {
4857     if (!len_s[proc]) continue;
4858     /* form outgoing message for i-structure:
4859          buf_si[0]:                 nrows to be sent
4860                [1:nrows]:           row index (global)
4861                [nrows+1:2*nrows+1]: i-structure index
4862     */
4863     /*-------------------------------------------*/
4864     nrows       = len_si[proc]/2 - 1;
4865     buf_si_i    = buf_si + nrows+1;
4866     buf_si[0]   = nrows;
4867     buf_si_i[0] = 0;
4868     nrows       = 0;
4869     for (i=owners[proc]; i<owners[proc+1]; i++) {
4870       anzi = ai[i+1] - ai[i];
4871       if (anzi) {
4872         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4873         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4874         nrows++;
4875       }
4876     }
4877     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4878     k++;
4879     buf_si += len_si[proc];
4880   }
4881 
4882   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4883   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4884 
4885   ierr = PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4886   for (i=0; i<merge->nrecv; i++) {
4887     ierr = PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4888   }
4889 
4890   ierr = PetscFree(len_si);CHKERRQ(ierr);
4891   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4892   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4893   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4894   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4895   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4896   ierr = PetscFree(status);CHKERRQ(ierr);
4897 
4898   /* compute a local seq matrix in each processor */
4899   /*----------------------------------------------*/
4900   /* allocate bi array and free space for accumulating nonzero column info */
4901   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4902   bi[0] = 0;
4903 
4904   /* create and initialize a linked list */
4905   nlnk = N+1;
4906   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4907 
4908   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4909   len  = ai[owners[rank+1]] - ai[owners[rank]];
4910   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4911 
4912   current_space = free_space;
4913 
4914   /* determine symbolic info for each local row */
4915   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4916 
4917   for (k=0; k<merge->nrecv; k++) {
4918     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4919     nrows       = *buf_ri_k[k];
4920     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4921     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4922   }
4923 
4924   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4925   len  = 0;
4926   for (i=0; i<m; i++) {
4927     bnzi = 0;
4928     /* add local non-zero cols of this proc's seqmat into lnk */
4929     arow  = owners[rank] + i;
4930     anzi  = ai[arow+1] - ai[arow];
4931     aj    = a->j + ai[arow];
4932     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4933     bnzi += nlnk;
4934     /* add received col data into lnk */
4935     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4936       if (i == *nextrow[k]) { /* i-th row */
4937         anzi  = *(nextai[k]+1) - *nextai[k];
4938         aj    = buf_rj[k] + *nextai[k];
4939         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4940         bnzi += nlnk;
4941         nextrow[k]++; nextai[k]++;
4942       }
4943     }
4944     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4945 
4946     /* if free space is not available, make more free space */
4947     if (current_space->local_remaining<bnzi) {
4948       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4949       nspacedouble++;
4950     }
4951     /* copy data into free space, then initialize lnk */
4952     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4953     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4954 
4955     current_space->array           += bnzi;
4956     current_space->local_used      += bnzi;
4957     current_space->local_remaining -= bnzi;
4958 
4959     bi[i+1] = bi[i] + bnzi;
4960   }
4961 
4962   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4963 
4964   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4965   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4966   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4967 
4968   /* create symbolic parallel matrix B_mpi */
4969   /*---------------------------------------*/
4970   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4971   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4972   if (n==PETSC_DECIDE) {
4973     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4974   } else {
4975     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4976   }
4977   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4978   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4979   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4980   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4981   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4982 
4983   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4984   B_mpi->assembled  = PETSC_FALSE;
4985   merge->bi         = bi;
4986   merge->bj         = bj;
4987   merge->buf_ri     = buf_ri;
4988   merge->buf_rj     = buf_rj;
4989   merge->coi        = NULL;
4990   merge->coj        = NULL;
4991   merge->owners_co  = NULL;
4992 
4993   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4994 
4995   /* attach the supporting struct to B_mpi for reuse */
4996   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4997   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4998   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4999   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5000   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5001   *mpimat = B_mpi;
5002 
5003   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5004   PetscFunctionReturn(0);
5005 }
5006 
5007 /*@C
5008       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5009                  matrices from each processor
5010 
5011     Collective
5012 
5013    Input Parameters:
5014 +    comm - the communicators the parallel matrix will live on
5015 .    seqmat - the input sequential matrices
5016 .    m - number of local rows (or PETSC_DECIDE)
5017 .    n - number of local columns (or PETSC_DECIDE)
5018 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5019 
5020    Output Parameter:
5021 .    mpimat - the parallel matrix generated
5022 
5023     Level: advanced
5024 
5025    Notes:
5026      The dimensions of the sequential matrix in each processor MUST be the same.
5027      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5028      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5029 @*/
5030 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5031 {
5032   PetscErrorCode ierr;
5033   PetscMPIInt    size;
5034 
5035   PetscFunctionBegin;
5036   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5037   if (size == 1) {
5038     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5039     if (scall == MAT_INITIAL_MATRIX) {
5040       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5041     } else {
5042       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5043     }
5044     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5045     PetscFunctionReturn(0);
5046   }
5047   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5048   if (scall == MAT_INITIAL_MATRIX) {
5049     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5050   }
5051   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5052   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5053   PetscFunctionReturn(0);
5054 }
5055 
5056 /*@
5057      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5058           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5059           with MatGetSize()
5060 
5061     Not Collective
5062 
5063    Input Parameters:
5064 +    A - the matrix
5065 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5066 
5067    Output Parameter:
5068 .    A_loc - the local sequential matrix generated
5069 
5070     Level: developer
5071 
5072    Notes:
5073      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5074      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5075      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5076      modify the values of the returned A_loc.
5077 
5078 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5079 @*/
5080 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5081 {
5082   PetscErrorCode    ierr;
5083   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5084   Mat_SeqAIJ        *mat,*a,*b;
5085   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5086   const PetscScalar *aa,*ba,*aav,*bav;
5087   PetscScalar       *ca,*cam;
5088   PetscMPIInt       size;
5089   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5090   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5091   PetscBool         match;
5092 
5093   PetscFunctionBegin;
5094   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5095   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5096   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5097   if (size == 1) {
5098     if (scall == MAT_INITIAL_MATRIX) {
5099       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5100       *A_loc = mpimat->A;
5101     } else if (scall == MAT_REUSE_MATRIX) {
5102       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5103     }
5104     PetscFunctionReturn(0);
5105   }
5106 
5107   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5108   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5109   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5110   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5111   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5112   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5113   aa   = aav;
5114   ba   = bav;
5115   if (scall == MAT_INITIAL_MATRIX) {
5116     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5117     ci[0] = 0;
5118     for (i=0; i<am; i++) {
5119       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5120     }
5121     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5122     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5123     k    = 0;
5124     for (i=0; i<am; i++) {
5125       ncols_o = bi[i+1] - bi[i];
5126       ncols_d = ai[i+1] - ai[i];
5127       /* off-diagonal portion of A */
5128       for (jo=0; jo<ncols_o; jo++) {
5129         col = cmap[*bj];
5130         if (col >= cstart) break;
5131         cj[k]   = col; bj++;
5132         ca[k++] = *ba++;
5133       }
5134       /* diagonal portion of A */
5135       for (j=0; j<ncols_d; j++) {
5136         cj[k]   = cstart + *aj++;
5137         ca[k++] = *aa++;
5138       }
5139       /* off-diagonal portion of A */
5140       for (j=jo; j<ncols_o; j++) {
5141         cj[k]   = cmap[*bj++];
5142         ca[k++] = *ba++;
5143       }
5144     }
5145     /* put together the new matrix */
5146     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5147     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5148     /* Since these are PETSc arrays, change flags to free them as necessary. */
5149     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5150     mat->free_a  = PETSC_TRUE;
5151     mat->free_ij = PETSC_TRUE;
5152     mat->nonew   = 0;
5153   } else if (scall == MAT_REUSE_MATRIX) {
5154     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5155     ci   = mat->i;
5156     cj   = mat->j;
5157     ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5158     for (i=0; i<am; i++) {
5159       /* off-diagonal portion of A */
5160       ncols_o = bi[i+1] - bi[i];
5161       for (jo=0; jo<ncols_o; jo++) {
5162         col = cmap[*bj];
5163         if (col >= cstart) break;
5164         *cam++ = *ba++; bj++;
5165       }
5166       /* diagonal portion of A */
5167       ncols_d = ai[i+1] - ai[i];
5168       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5169       /* off-diagonal portion of A */
5170       for (j=jo; j<ncols_o; j++) {
5171         *cam++ = *ba++; bj++;
5172       }
5173     }
5174     ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5175   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5176   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5177   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5178   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5179   PetscFunctionReturn(0);
5180 }
5181 
5182 /*@
5183      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5184           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5185 
5186     Not Collective
5187 
5188    Input Parameters:
5189 +    A - the matrix
5190 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5191 
5192    Output Parameters:
5193 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5194 -    A_loc - the local sequential matrix generated
5195 
5196     Level: developer
5197 
5198    Notes:
5199      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5200 
5201 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5202 
5203 @*/
5204 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5205 {
5206   PetscErrorCode ierr;
5207   Mat            Ao,Ad;
5208   const PetscInt *cmap;
5209   PetscMPIInt    size;
5210   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5211 
5212   PetscFunctionBegin;
5213   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5214   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5215   if (size == 1) {
5216     if (scall == MAT_INITIAL_MATRIX) {
5217       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5218       *A_loc = Ad;
5219     } else if (scall == MAT_REUSE_MATRIX) {
5220       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5221     }
5222     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5223     PetscFunctionReturn(0);
5224   }
5225   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5226   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5227   if (f) {
5228     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5229   } else {
5230     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5231     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5232     Mat_SeqAIJ        *c;
5233     PetscInt          *ai = a->i, *aj = a->j;
5234     PetscInt          *bi = b->i, *bj = b->j;
5235     PetscInt          *ci,*cj;
5236     const PetscScalar *aa,*ba;
5237     PetscScalar       *ca;
5238     PetscInt          i,j,am,dn,on;
5239 
5240     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5241     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5242     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5243     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5244     if (scall == MAT_INITIAL_MATRIX) {
5245       PetscInt k;
5246       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5247       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5248       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5249       ci[0] = 0;
5250       for (i=0,k=0; i<am; i++) {
5251         const PetscInt ncols_o = bi[i+1] - bi[i];
5252         const PetscInt ncols_d = ai[i+1] - ai[i];
5253         ci[i+1] = ci[i] + ncols_o + ncols_d;
5254         /* diagonal portion of A */
5255         for (j=0; j<ncols_d; j++,k++) {
5256           cj[k] = *aj++;
5257           ca[k] = *aa++;
5258         }
5259         /* off-diagonal portion of A */
5260         for (j=0; j<ncols_o; j++,k++) {
5261           cj[k] = dn + *bj++;
5262           ca[k] = *ba++;
5263         }
5264       }
5265       /* put together the new matrix */
5266       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5267       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5268       /* Since these are PETSc arrays, change flags to free them as necessary. */
5269       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5270       c->free_a  = PETSC_TRUE;
5271       c->free_ij = PETSC_TRUE;
5272       c->nonew   = 0;
5273       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5274     } else if (scall == MAT_REUSE_MATRIX) {
5275       ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5276       for (i=0; i<am; i++) {
5277         const PetscInt ncols_d = ai[i+1] - ai[i];
5278         const PetscInt ncols_o = bi[i+1] - bi[i];
5279         /* diagonal portion of A */
5280         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5281         /* off-diagonal portion of A */
5282         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5283       }
5284       ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5285     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5286     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5287     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5288     if (glob) {
5289       PetscInt cst, *gidx;
5290 
5291       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5292       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5293       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5294       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5295       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5296     }
5297   }
5298   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5299   PetscFunctionReturn(0);
5300 }
5301 
5302 /*@C
5303      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5304 
5305     Not Collective
5306 
5307    Input Parameters:
5308 +    A - the matrix
5309 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5310 -    row, col - index sets of rows and columns to extract (or NULL)
5311 
5312    Output Parameter:
5313 .    A_loc - the local sequential matrix generated
5314 
5315     Level: developer
5316 
5317 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5318 
5319 @*/
5320 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5321 {
5322   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5323   PetscErrorCode ierr;
5324   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5325   IS             isrowa,iscola;
5326   Mat            *aloc;
5327   PetscBool      match;
5328 
5329   PetscFunctionBegin;
5330   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5331   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5332   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5333   if (!row) {
5334     start = A->rmap->rstart; end = A->rmap->rend;
5335     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5336   } else {
5337     isrowa = *row;
5338   }
5339   if (!col) {
5340     start = A->cmap->rstart;
5341     cmap  = a->garray;
5342     nzA   = a->A->cmap->n;
5343     nzB   = a->B->cmap->n;
5344     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5345     ncols = 0;
5346     for (i=0; i<nzB; i++) {
5347       if (cmap[i] < start) idx[ncols++] = cmap[i];
5348       else break;
5349     }
5350     imark = i;
5351     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5352     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5353     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5354   } else {
5355     iscola = *col;
5356   }
5357   if (scall != MAT_INITIAL_MATRIX) {
5358     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5359     aloc[0] = *A_loc;
5360   }
5361   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5362   if (!col) { /* attach global id of condensed columns */
5363     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5364   }
5365   *A_loc = aloc[0];
5366   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5367   if (!row) {
5368     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5369   }
5370   if (!col) {
5371     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5372   }
5373   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5374   PetscFunctionReturn(0);
5375 }
5376 
5377 /*
5378  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5379  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5380  * on a global size.
5381  * */
5382 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5383 {
5384   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5385   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5386   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5387   PetscMPIInt              owner;
5388   PetscSFNode              *iremote,*oiremote;
5389   const PetscInt           *lrowindices;
5390   PetscErrorCode           ierr;
5391   PetscSF                  sf,osf;
5392   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5393   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5394   MPI_Comm                 comm;
5395   ISLocalToGlobalMapping   mapping;
5396   const PetscScalar        *pd_a,*po_a;
5397 
5398   PetscFunctionBegin;
5399   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5400   /* plocalsize is the number of roots
5401    * nrows is the number of leaves
5402    * */
5403   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5404   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5405   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5406   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5407   for (i=0;i<nrows;i++) {
5408     /* Find a remote index and an owner for a row
5409      * The row could be local or remote
5410      * */
5411     owner = 0;
5412     lidx  = 0;
5413     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5414     iremote[i].index = lidx;
5415     iremote[i].rank  = owner;
5416   }
5417   /* Create SF to communicate how many nonzero columns for each row */
5418   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5419   /* SF will figure out the number of nonzero colunms for each row, and their
5420    * offsets
5421    * */
5422   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5423   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5424   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5425 
5426   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5427   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5428   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5429   roffsets[0] = 0;
5430   roffsets[1] = 0;
5431   for (i=0;i<plocalsize;i++) {
5432     /* diag */
5433     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5434     /* off diag */
5435     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5436     /* compute offsets so that we relative location for each row */
5437     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5438     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5439   }
5440   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5441   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5442   /* 'r' means root, and 'l' means leaf */
5443   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5444   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5445   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5446   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5447   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5448   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5449   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5450   dntotalcols = 0;
5451   ontotalcols = 0;
5452   ncol = 0;
5453   for (i=0;i<nrows;i++) {
5454     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5455     ncol = PetscMax(pnnz[i],ncol);
5456     /* diag */
5457     dntotalcols += nlcols[i*2+0];
5458     /* off diag */
5459     ontotalcols += nlcols[i*2+1];
5460   }
5461   /* We do not need to figure the right number of columns
5462    * since all the calculations will be done by going through the raw data
5463    * */
5464   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5465   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5466   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5467   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5468   /* diag */
5469   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5470   /* off diag */
5471   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5472   /* diag */
5473   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5474   /* off diag */
5475   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5476   dntotalcols = 0;
5477   ontotalcols = 0;
5478   ntotalcols  = 0;
5479   for (i=0;i<nrows;i++) {
5480     owner = 0;
5481     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5482     /* Set iremote for diag matrix */
5483     for (j=0;j<nlcols[i*2+0];j++) {
5484       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5485       iremote[dntotalcols].rank    = owner;
5486       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5487       ilocal[dntotalcols++]        = ntotalcols++;
5488     }
5489     /* off diag */
5490     for (j=0;j<nlcols[i*2+1];j++) {
5491       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5492       oiremote[ontotalcols].rank    = owner;
5493       oilocal[ontotalcols++]        = ntotalcols++;
5494     }
5495   }
5496   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5497   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5498   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5499   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5500   /* P serves as roots and P_oth is leaves
5501    * Diag matrix
5502    * */
5503   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5504   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5505   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5506 
5507   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5508   /* Off diag */
5509   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5510   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5511   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5512   ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5513   ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5514   /* We operate on the matrix internal data for saving memory */
5515   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5516   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5517   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5518   /* Convert to global indices for diag matrix */
5519   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5520   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5521   /* We want P_oth store global indices */
5522   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5523   /* Use memory scalable approach */
5524   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5525   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5526   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5527   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5528   /* Convert back to local indices */
5529   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5530   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5531   nout = 0;
5532   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5533   PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5534   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5535   /* Exchange values */
5536   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5537   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5538   ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5539   ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5540   /* Stop PETSc from shrinking memory */
5541   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5542   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5543   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5544   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5545   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5546   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5547   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5548   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5549   PetscFunctionReturn(0);
5550 }
5551 
5552 /*
5553  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5554  * This supports MPIAIJ and MAIJ
5555  * */
5556 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5557 {
5558   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5559   Mat_SeqAIJ            *p_oth;
5560   IS                    rows,map;
5561   PetscHMapI            hamp;
5562   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5563   MPI_Comm              comm;
5564   PetscSF               sf,osf;
5565   PetscBool             has;
5566   PetscErrorCode        ierr;
5567 
5568   PetscFunctionBegin;
5569   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5570   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5571   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5572    *  and then create a submatrix (that often is an overlapping matrix)
5573    * */
5574   if (reuse == MAT_INITIAL_MATRIX) {
5575     /* Use a hash table to figure out unique keys */
5576     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5577     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5578     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5579     count = 0;
5580     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5581     for (i=0;i<a->B->cmap->n;i++) {
5582       key  = a->garray[i]/dof;
5583       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5584       if (!has) {
5585         mapping[i] = count;
5586         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5587       } else {
5588         /* Current 'i' has the same value the previous step */
5589         mapping[i] = count-1;
5590       }
5591     }
5592     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5593     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5594     PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5595     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5596     off = 0;
5597     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5598     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5599     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5600     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5601     /* In case, the matrix was already created but users want to recreate the matrix */
5602     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5603     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5604     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5605     ierr = ISDestroy(&map);CHKERRQ(ierr);
5606     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5607   } else if (reuse == MAT_REUSE_MATRIX) {
5608     /* If matrix was already created, we simply update values using SF objects
5609      * that as attached to the matrix ealier.
5610      */
5611     const PetscScalar *pd_a,*po_a;
5612 
5613     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5614     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5615     PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5616     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5617     /* Update values in place */
5618     ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5619     ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5620     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5621     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5622     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5623     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5624     ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5625     ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5626   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5627   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5628   PetscFunctionReturn(0);
5629 }
5630 
5631 /*@C
5632   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5633 
5634   Collective on Mat
5635 
5636   Input Parameters:
5637 + A - the first matrix in mpiaij format
5638 . B - the second matrix in mpiaij format
5639 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5640 
5641   Output Parameters:
5642 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5643 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5644 - B_seq - the sequential matrix generated
5645 
5646   Level: developer
5647 
5648 @*/
5649 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5650 {
5651   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5652   PetscErrorCode ierr;
5653   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5654   IS             isrowb,iscolb;
5655   Mat            *bseq=NULL;
5656 
5657   PetscFunctionBegin;
5658   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5659     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5660   }
5661   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5662 
5663   if (scall == MAT_INITIAL_MATRIX) {
5664     start = A->cmap->rstart;
5665     cmap  = a->garray;
5666     nzA   = a->A->cmap->n;
5667     nzB   = a->B->cmap->n;
5668     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5669     ncols = 0;
5670     for (i=0; i<nzB; i++) {  /* row < local row index */
5671       if (cmap[i] < start) idx[ncols++] = cmap[i];
5672       else break;
5673     }
5674     imark = i;
5675     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5676     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5677     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5678     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5679   } else {
5680     PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5681     isrowb  = *rowb; iscolb = *colb;
5682     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5683     bseq[0] = *B_seq;
5684   }
5685   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5686   *B_seq = bseq[0];
5687   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5688   if (!rowb) {
5689     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5690   } else {
5691     *rowb = isrowb;
5692   }
5693   if (!colb) {
5694     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5695   } else {
5696     *colb = iscolb;
5697   }
5698   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5699   PetscFunctionReturn(0);
5700 }
5701 
5702 /*
5703     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5704     of the OFF-DIAGONAL portion of local A
5705 
5706     Collective on Mat
5707 
5708    Input Parameters:
5709 +    A,B - the matrices in mpiaij format
5710 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5711 
5712    Output Parameter:
5713 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5714 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5715 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5716 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5717 
5718     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5719      for this matrix. This is not desirable..
5720 
5721     Level: developer
5722 
5723 */
5724 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5725 {
5726   PetscErrorCode         ierr;
5727   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5728   Mat_SeqAIJ             *b_oth;
5729   VecScatter             ctx;
5730   MPI_Comm               comm;
5731   const PetscMPIInt      *rprocs,*sprocs;
5732   const PetscInt         *srow,*rstarts,*sstarts;
5733   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5734   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5735   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5736   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5737   PetscMPIInt            size,tag,rank,nreqs;
5738 
5739   PetscFunctionBegin;
5740   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5741   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5742 
5743   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5744     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5745   }
5746   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5747   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5748 
5749   if (size == 1) {
5750     startsj_s = NULL;
5751     bufa_ptr  = NULL;
5752     *B_oth    = NULL;
5753     PetscFunctionReturn(0);
5754   }
5755 
5756   ctx = a->Mvctx;
5757   tag = ((PetscObject)ctx)->tag;
5758 
5759   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5760   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5761   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5762   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5763   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5764   rwaits = reqs;
5765   swaits = reqs + nrecvs;
5766 
5767   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5768   if (scall == MAT_INITIAL_MATRIX) {
5769     /* i-array */
5770     /*---------*/
5771     /*  post receives */
5772     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5773     for (i=0; i<nrecvs; i++) {
5774       rowlen = rvalues + rstarts[i]*rbs;
5775       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5776       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5777     }
5778 
5779     /* pack the outgoing message */
5780     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5781 
5782     sstartsj[0] = 0;
5783     rstartsj[0] = 0;
5784     len         = 0; /* total length of j or a array to be sent */
5785     if (nsends) {
5786       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5787       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5788     }
5789     for (i=0; i<nsends; i++) {
5790       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5791       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5792       for (j=0; j<nrows; j++) {
5793         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5794         for (l=0; l<sbs; l++) {
5795           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5796 
5797           rowlen[j*sbs+l] = ncols;
5798 
5799           len += ncols;
5800           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5801         }
5802         k++;
5803       }
5804       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5805 
5806       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5807     }
5808     /* recvs and sends of i-array are completed */
5809     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5810     ierr = PetscFree(svalues);CHKERRQ(ierr);
5811 
5812     /* allocate buffers for sending j and a arrays */
5813     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5814     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5815 
5816     /* create i-array of B_oth */
5817     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5818 
5819     b_othi[0] = 0;
5820     len       = 0; /* total length of j or a array to be received */
5821     k         = 0;
5822     for (i=0; i<nrecvs; i++) {
5823       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5824       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5825       for (j=0; j<nrows; j++) {
5826         b_othi[k+1] = b_othi[k] + rowlen[j];
5827         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5828         k++;
5829       }
5830       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5831     }
5832     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5833 
5834     /* allocate space for j and a arrrays of B_oth */
5835     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5836     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5837 
5838     /* j-array */
5839     /*---------*/
5840     /*  post receives of j-array */
5841     for (i=0; i<nrecvs; i++) {
5842       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5843       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5844     }
5845 
5846     /* pack the outgoing message j-array */
5847     if (nsends) k = sstarts[0];
5848     for (i=0; i<nsends; i++) {
5849       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5850       bufJ  = bufj+sstartsj[i];
5851       for (j=0; j<nrows; j++) {
5852         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5853         for (ll=0; ll<sbs; ll++) {
5854           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5855           for (l=0; l<ncols; l++) {
5856             *bufJ++ = cols[l];
5857           }
5858           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5859         }
5860       }
5861       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5862     }
5863 
5864     /* recvs and sends of j-array are completed */
5865     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5866   } else if (scall == MAT_REUSE_MATRIX) {
5867     sstartsj = *startsj_s;
5868     rstartsj = *startsj_r;
5869     bufa     = *bufa_ptr;
5870     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5871     ierr     = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5872   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5873 
5874   /* a-array */
5875   /*---------*/
5876   /*  post receives of a-array */
5877   for (i=0; i<nrecvs; i++) {
5878     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5879     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5880   }
5881 
5882   /* pack the outgoing message a-array */
5883   if (nsends) k = sstarts[0];
5884   for (i=0; i<nsends; i++) {
5885     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5886     bufA  = bufa+sstartsj[i];
5887     for (j=0; j<nrows; j++) {
5888       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5889       for (ll=0; ll<sbs; ll++) {
5890         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5891         for (l=0; l<ncols; l++) {
5892           *bufA++ = vals[l];
5893         }
5894         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5895       }
5896     }
5897     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5898   }
5899   /* recvs and sends of a-array are completed */
5900   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5901   ierr = PetscFree(reqs);CHKERRQ(ierr);
5902 
5903   if (scall == MAT_INITIAL_MATRIX) {
5904     /* put together the new matrix */
5905     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5906 
5907     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5908     /* Since these are PETSc arrays, change flags to free them as necessary. */
5909     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5910     b_oth->free_a  = PETSC_TRUE;
5911     b_oth->free_ij = PETSC_TRUE;
5912     b_oth->nonew   = 0;
5913 
5914     ierr = PetscFree(bufj);CHKERRQ(ierr);
5915     if (!startsj_s || !bufa_ptr) {
5916       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5917       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5918     } else {
5919       *startsj_s = sstartsj;
5920       *startsj_r = rstartsj;
5921       *bufa_ptr  = bufa;
5922     }
5923   } else if (scall == MAT_REUSE_MATRIX) {
5924     ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5925   }
5926 
5927   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5928   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5929   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5930   PetscFunctionReturn(0);
5931 }
5932 
5933 /*@C
5934   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5935 
5936   Not Collective
5937 
5938   Input Parameter:
5939 . A - The matrix in mpiaij format
5940 
5941   Output Parameters:
5942 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5943 . colmap - A map from global column index to local index into lvec
5944 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5945 
5946   Level: developer
5947 
5948 @*/
5949 #if defined(PETSC_USE_CTABLE)
5950 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5951 #else
5952 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5953 #endif
5954 {
5955   Mat_MPIAIJ *a;
5956 
5957   PetscFunctionBegin;
5958   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5959   PetscValidPointer(lvec, 2);
5960   PetscValidPointer(colmap, 3);
5961   PetscValidPointer(multScatter, 4);
5962   a = (Mat_MPIAIJ*) A->data;
5963   if (lvec) *lvec = a->lvec;
5964   if (colmap) *colmap = a->colmap;
5965   if (multScatter) *multScatter = a->Mvctx;
5966   PetscFunctionReturn(0);
5967 }
5968 
5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5970 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5972 #if defined(PETSC_HAVE_MKL_SPARSE)
5973 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5974 #endif
5975 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5976 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5977 #if defined(PETSC_HAVE_ELEMENTAL)
5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5979 #endif
5980 #if defined(PETSC_HAVE_SCALAPACK)
5981 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5982 #endif
5983 #if defined(PETSC_HAVE_HYPRE)
5984 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5985 #endif
5986 #if defined(PETSC_HAVE_CUDA)
5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5988 #endif
5989 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5991 #endif
5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5993 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5994 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5995 
5996 /*
5997     Computes (B'*A')' since computing B*A directly is untenable
5998 
5999                n                       p                          p
6000         [             ]       [             ]         [                 ]
6001       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6002         [             ]       [             ]         [                 ]
6003 
6004 */
6005 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
6006 {
6007   PetscErrorCode ierr;
6008   Mat            At,Bt,Ct;
6009 
6010   PetscFunctionBegin;
6011   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
6012   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
6013   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
6014   ierr = MatDestroy(&At);CHKERRQ(ierr);
6015   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
6016   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
6017   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
6018   PetscFunctionReturn(0);
6019 }
6020 
6021 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6022 {
6023   PetscErrorCode ierr;
6024   PetscBool      cisdense;
6025 
6026   PetscFunctionBegin;
6027   PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
6028   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
6029   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6030   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6031   if (!cisdense) {
6032     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6033   }
6034   ierr = MatSetUp(C);CHKERRQ(ierr);
6035 
6036   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6037   PetscFunctionReturn(0);
6038 }
6039 
6040 /* ----------------------------------------------------------------*/
6041 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6042 {
6043   Mat_Product *product = C->product;
6044   Mat         A = product->A,B=product->B;
6045 
6046   PetscFunctionBegin;
6047   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6048     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6049 
6050   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6051   C->ops->productsymbolic = MatProductSymbolic_AB;
6052   PetscFunctionReturn(0);
6053 }
6054 
6055 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6056 {
6057   PetscErrorCode ierr;
6058   Mat_Product    *product = C->product;
6059 
6060   PetscFunctionBegin;
6061   if (product->type == MATPRODUCT_AB) {
6062     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6063   }
6064   PetscFunctionReturn(0);
6065 }
6066 
6067 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
6068    is greater than value, or last if there is no such element.
6069 */
6070 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
6071 {
6072   PetscCount  it,step,count = last - first;
6073 
6074   PetscFunctionBegin;
6075   while (count > 0) {
6076     it   = first;
6077     step = count / 2;
6078     it  += step;
6079     if (!(value < array[it])) {
6080       first  = ++it;
6081       count -= step + 1;
6082     } else count = step;
6083   }
6084   *upper = first;
6085   PetscFunctionReturn(0);
6086 }
6087 
6088 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix
6089 
6090   Input Parameters:
6091 
6092     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6093     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6094 
6095     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat
6096 
6097     For Set1, j1[] contains column indices of the nonzeros.
6098     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6099     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6100     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6101 
6102     Similar for Set2.
6103 
6104     This routine merges the two sets of nonzeros row by row and removes repeats.
6105 
6106   Output Parameters: (memories are allocated by the caller)
6107 
6108     i[],j[]: the CSR of the merged matrix, which has m rows.
6109     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6110     imap2[]: similar to imap1[], but for Set2.
6111     Note we order nonzeros row-by-row and from left to right.
6112 */
6113 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6114   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6115   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6116 {
6117   PetscErrorCode ierr;
6118   PetscInt       r,m; /* Row index of mat */
6119   PetscCount     t,t1,t2,b1,e1,b2,e2;
6120 
6121   PetscFunctionBegin;
6122   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
6123   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6124   i[0] = 0;
6125   for (r=0; r<m; r++) { /* Do row by row merging */
6126     b1   = rowBegin1[r];
6127     e1   = rowEnd1[r];
6128     b2   = rowBegin2[r];
6129     e2   = rowEnd2[r];
6130     while (b1 < e1 && b2 < e2) {
6131       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6132         j[t]      = j1[b1];
6133         imap1[t1] = t;
6134         imap2[t2] = t;
6135         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6136         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6137         t1++; t2++; t++;
6138       } else if (j1[b1] < j2[b2]) {
6139         j[t]      = j1[b1];
6140         imap1[t1] = t;
6141         b1       += jmap1[t1+1] - jmap1[t1];
6142         t1++; t++;
6143       } else {
6144         j[t]      = j2[b2];
6145         imap2[t2] = t;
6146         b2       += jmap2[t2+1] - jmap2[t2];
6147         t2++; t++;
6148       }
6149     }
6150     /* Merge the remaining in either j1[] or j2[] */
6151     while (b1 < e1) {
6152       j[t]      = j1[b1];
6153       imap1[t1] = t;
6154       b1       += jmap1[t1+1] - jmap1[t1];
6155       t1++; t++;
6156     }
6157     while (b2 < e2) {
6158       j[t]      = j2[b2];
6159       imap2[t2] = t;
6160       b2       += jmap2[t2+1] - jmap2[t2];
6161       t2++; t++;
6162     }
6163     i[r+1] = t;
6164   }
6165   PetscFunctionReturn(0);
6166 }
6167 
6168 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block
6169 
6170   Input Parameters:
6171     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6172     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6173       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6174 
6175       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6176       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6177 
6178   Output Parameters:
6179     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6180     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6181       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6182       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6183 
6184     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6185       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6186         repeats (i.e., same 'i,j' pair).
6187       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6188         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6189 
6190       Atot: number of entries belonging to the diagonal block
6191       Annz: number of unique nonzeros belonging to the diagonal block.
6192 
6193     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6194 
6195     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
6196 */
6197 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6198   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6199   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6200   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6201 {
6202   PetscErrorCode    ierr;
6203   PetscInt          cstart,cend,rstart,rend,row,col;
6204   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6205   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6206   PetscCount        k,m,p,q,r,s,mid;
6207   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6208 
6209   PetscFunctionBegin;
6210   ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr);
6211   ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr);
6212   m    = rend - rstart;
6213 
6214   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6215 
6216   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6217      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6218   */
6219   while (k<n) {
6220     row = i[k];
6221     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6222     for (s=k; s<n; s++) if (i[s] != row) break;
6223     for (p=k; p<s; p++) {
6224       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6225       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6226     }
6227     ierr = PetscSortIntWithCountArray(s-k,j+k,perm+k);CHKERRQ(ierr);
6228     ierr = PetscSortedIntUpperBound(j,k,s,-1,&mid);CHKERRQ(ierr); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6229     rowBegin[row-rstart] = k;
6230     rowMid[row-rstart]   = mid;
6231     rowEnd[row-rstart]   = s;
6232 
6233     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6234     Atot += mid - k;
6235     Btot += s - mid;
6236 
6237     /* Count unique nonzeros of this diag/offdiag row */
6238     for (p=k; p<mid;) {
6239       col = j[p];
6240       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6241       Annz++;
6242     }
6243 
6244     for (p=mid; p<s;) {
6245       col = j[p];
6246       do {p++;} while (p<s && j[p] == col);
6247       Bnnz++;
6248     }
6249     k = s;
6250   }
6251 
6252   /* Allocation according to Atot, Btot, Annz, Bnnz */
6253   ierr = PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap);CHKERRQ(ierr);
6254 
6255   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6256   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6257   for (r=0; r<m; r++) {
6258     k     = rowBegin[r];
6259     mid   = rowMid[r];
6260     s     = rowEnd[r];
6261     ierr  = PetscArraycpy(Aperm+Atot,perm+k,  mid-k);CHKERRQ(ierr);
6262     ierr  = PetscArraycpy(Bperm+Btot,perm+mid,s-mid);CHKERRQ(ierr);
6263     Atot += mid - k;
6264     Btot += s - mid;
6265 
6266     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6267     for (p=k; p<mid;) {
6268       col = j[p];
6269       q   = p;
6270       do {p++;} while (p<mid && j[p] == col);
6271       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6272       Annz++;
6273     }
6274 
6275     for (p=mid; p<s;) {
6276       col = j[p];
6277       q   = p;
6278       do {p++;} while (p<s && j[p] == col);
6279       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6280       Bnnz++;
6281     }
6282   }
6283   /* Output */
6284   *Aperm_ = Aperm;
6285   *Annz_  = Annz;
6286   *Atot_  = Atot;
6287   *Ajmap_ = Ajmap;
6288   *Bperm_ = Bperm;
6289   *Bnnz_  = Bnnz;
6290   *Btot_  = Btot;
6291   *Bjmap_ = Bjmap;
6292   PetscFunctionReturn(0);
6293 }
6294 
6295 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6296 {
6297   PetscErrorCode            ierr;
6298   MPI_Comm                  comm;
6299   PetscMPIInt               rank,size;
6300   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6301   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6302   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6303 
6304   PetscFunctionBegin;
6305   ierr = PetscFree(mpiaij->garray);CHKERRQ(ierr);
6306   ierr = VecDestroy(&mpiaij->lvec);CHKERRQ(ierr);
6307 #if defined(PETSC_USE_CTABLE)
6308   ierr = PetscTableDestroy(&mpiaij->colmap);CHKERRQ(ierr);
6309 #else
6310   ierr = PetscFree(mpiaij->colmap);CHKERRQ(ierr);
6311 #endif
6312   ierr = VecScatterDestroy(&mpiaij->Mvctx);CHKERRQ(ierr);
6313   mat->assembled = PETSC_FALSE;
6314   mat->was_assembled = PETSC_FALSE;
6315   ierr = MatResetPreallocationCOO_MPIAIJ(mat);CHKERRQ(ierr);
6316 
6317   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
6318   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
6319   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
6320   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
6321   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
6322   ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr);
6323   ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr);
6324   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
6325   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
6326 
6327   /* ---------------------------------------------------------------------------*/
6328   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6329   /* entries come first, then local rows, then remote rows.                     */
6330   /* ---------------------------------------------------------------------------*/
6331   PetscCount n1 = coo_n,*perm1;
6332   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6333   ierr = PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1);CHKERRQ(ierr);
6334   ierr = PetscArraycpy(i1,coo_i,n1);CHKERRQ(ierr); /* Make a copy since we'll modify it */
6335   ierr = PetscArraycpy(j1,coo_j,n1);CHKERRQ(ierr);
6336   for (k=0; k<n1; k++) perm1[k] = k;
6337 
6338   /* Manipulate indices so that entries with negative row or col indices will have smallest
6339      row indices, local entries will have greater but negative row indices, and remote entries
6340      will have positive row indices.
6341   */
6342   for (k=0; k<n1; k++) {
6343     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6344     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6345     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6346     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6347   }
6348 
6349   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6350   ierr = PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1);CHKERRQ(ierr);
6351   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6352   ierr = PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem);CHKERRQ(ierr); /* rem is upper bound of the last local row */
6353   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6354 
6355   /* ---------------------------------------------------------------------------*/
6356   /*           Split local rows into diag/offdiag portions                      */
6357   /* ---------------------------------------------------------------------------*/
6358   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6359   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6360   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6361 
6362   ierr = PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1);CHKERRQ(ierr);
6363   ierr = PetscMalloc1(n1-rem,&Cperm1);CHKERRQ(ierr);
6364   ierr = MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1);CHKERRQ(ierr);
6365 
6366   /* ---------------------------------------------------------------------------*/
6367   /*           Send remote rows to their owner                                  */
6368   /* ---------------------------------------------------------------------------*/
6369   /* Find which rows should be sent to which remote ranks*/
6370   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6371   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6372   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6373   const PetscInt *ranges;
6374   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6375 
6376   ierr = PetscLayoutGetRanges(mat->rmap,&ranges);CHKERRQ(ierr);
6377   ierr = PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries);CHKERRQ(ierr);
6378   for (k=rem; k<n1;) {
6379     PetscMPIInt  owner;
6380     PetscInt     firstRow,lastRow;
6381 
6382     /* Locate a row range */
6383     firstRow = i1[k]; /* first row of this owner */
6384     ierr     = PetscLayoutFindOwner(mat->rmap,firstRow,&owner);CHKERRQ(ierr);
6385     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6386 
6387     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6388     ierr = PetscSortedIntUpperBound(i1,k,n1,lastRow,&p);CHKERRQ(ierr);
6389 
6390     /* All entries in [k,p) belong to this remote owner */
6391     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6392       PetscMPIInt *sendto2;
6393       PetscInt    *nentries2;
6394       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6395 
6396       ierr = PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2);CHKERRQ(ierr);
6397       ierr = PetscArraycpy(sendto2,sendto,maxNsend);CHKERRQ(ierr);
6398       ierr = PetscArraycpy(nentries2,nentries2,maxNsend+1);CHKERRQ(ierr);
6399       ierr = PetscFree2(sendto,nentries2);CHKERRQ(ierr);
6400       sendto      = sendto2;
6401       nentries    = nentries2;
6402       maxNsend    = maxNsend2;
6403     }
6404     sendto[nsend]   = owner;
6405     nentries[nsend] = p - k;
6406     ierr = PetscCountCast(p-k,&nentries[nsend]);CHKERRQ(ierr);
6407     nsend++;
6408     k = p;
6409   }
6410 
6411   /* Build 1st SF to know offsets on remote to send data */
6412   PetscSF     sf1;
6413   PetscInt    nroots = 1,nroots2 = 0;
6414   PetscInt    nleaves = nsend,nleaves2 = 0;
6415   PetscInt    *offsets;
6416   PetscSFNode *iremote;
6417 
6418   ierr = PetscSFCreate(comm,&sf1);CHKERRQ(ierr);
6419   ierr = PetscMalloc1(nsend,&iremote);CHKERRQ(ierr);
6420   ierr = PetscMalloc1(nsend,&offsets);CHKERRQ(ierr);
6421   for (k=0; k<nsend; k++) {
6422     iremote[k].rank  = sendto[k];
6423     iremote[k].index = 0;
6424     nleaves2        += nentries[k];
6425     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6426   }
6427   ierr = PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
6428   ierr = PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM);CHKERRQ(ierr);
6429   ierr = PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM);CHKERRQ(ierr); /* Would nroots2 overflow, we check offsets[] below */
6430   ierr = PetscSFDestroy(&sf1);CHKERRQ(ierr);
6431   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);
6432 
6433   /* Build 2nd SF to send remote COOs to their owner */
6434   PetscSF sf2;
6435   nroots  = nroots2;
6436   nleaves = nleaves2;
6437   ierr    = PetscSFCreate(comm,&sf2);CHKERRQ(ierr);
6438   ierr    = PetscSFSetFromOptions(sf2);CHKERRQ(ierr);
6439   ierr    = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
6440   p       = 0;
6441   for (k=0; k<nsend; k++) {
6442     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6443     for (q=0; q<nentries[k]; q++,p++) {
6444       iremote[p].rank  = sendto[k];
6445       iremote[p].index = offsets[k] + q;
6446     }
6447   }
6448   ierr = PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
6449 
6450   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
6451   ierr = PetscArraycpy(Cperm1,perm1+rem,n1-rem);CHKERRQ(ierr);
6452 
6453   /* Send the remote COOs to their owner */
6454   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6455   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6456   ierr = PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2);CHKERRQ(ierr);
6457   ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE);CHKERRQ(ierr);
6458   ierr = PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE);CHKERRQ(ierr);
6459   ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE);CHKERRQ(ierr);
6460   ierr = PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE);CHKERRQ(ierr);
6461 
6462   ierr = PetscFree(offsets);CHKERRQ(ierr);
6463   ierr = PetscFree2(sendto,nentries);CHKERRQ(ierr);
6464 
6465   /* ---------------------------------------------------------------*/
6466   /* Sort received COOs by row along with the permutation array     */
6467   /* ---------------------------------------------------------------*/
6468   for (k=0; k<n2; k++) perm2[k] = k;
6469   ierr = PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2);CHKERRQ(ierr);
6470 
6471   /* ---------------------------------------------------------------*/
6472   /* Split received COOs into diag/offdiag portions                 */
6473   /* ---------------------------------------------------------------*/
6474   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6475   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6476   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6477 
6478   ierr = PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2);CHKERRQ(ierr);
6479   ierr = MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2);CHKERRQ(ierr);
6480 
6481   /* --------------------------------------------------------------------------*/
6482   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6483   /* --------------------------------------------------------------------------*/
6484   PetscInt   *Ai,*Bi;
6485   PetscInt   *Aj,*Bj;
6486 
6487   ierr = PetscMalloc1(m+1,&Ai);CHKERRQ(ierr);
6488   ierr = PetscMalloc1(m+1,&Bi);CHKERRQ(ierr);
6489   ierr = PetscMalloc1(Annz1+Annz2,&Aj);CHKERRQ(ierr); /* Since local and remote entries might have dups, we might allocate excess memory */
6490   ierr = PetscMalloc1(Bnnz1+Bnnz2,&Bj);CHKERRQ(ierr);
6491 
6492   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6493   ierr = PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2);CHKERRQ(ierr);
6494 
6495   ierr = MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj);CHKERRQ(ierr);
6496   ierr = MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj);CHKERRQ(ierr);
6497   ierr = PetscFree3(rowBegin1,rowMid1,rowEnd1);CHKERRQ(ierr);
6498   ierr = PetscFree3(rowBegin2,rowMid2,rowEnd2);CHKERRQ(ierr);
6499   ierr = PetscFree3(i1,j1,perm1);CHKERRQ(ierr);
6500   ierr = PetscFree3(i2,j2,perm2);CHKERRQ(ierr);
6501 
6502   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6503   PetscInt Annz = Ai[m];
6504   PetscInt Bnnz = Bi[m];
6505   if (Annz < Annz1 + Annz2) {
6506     PetscInt *Aj_new;
6507     ierr = PetscMalloc1(Annz,&Aj_new);CHKERRQ(ierr);
6508     ierr = PetscArraycpy(Aj_new,Aj,Annz);CHKERRQ(ierr);
6509     ierr = PetscFree(Aj);CHKERRQ(ierr);
6510     Aj   = Aj_new;
6511   }
6512 
6513   if (Bnnz < Bnnz1 + Bnnz2) {
6514     PetscInt *Bj_new;
6515     ierr = PetscMalloc1(Bnnz,&Bj_new);CHKERRQ(ierr);
6516     ierr = PetscArraycpy(Bj_new,Bj,Bnnz);CHKERRQ(ierr);
6517     ierr = PetscFree(Bj);CHKERRQ(ierr);
6518     Bj   = Bj_new;
6519   }
6520 
6521   /* --------------------------------------------------------------------------------*/
6522   /* Create new submatrices for on-process and off-process coupling                  */
6523   /* --------------------------------------------------------------------------------*/
6524   PetscScalar   *Aa,*Ba;
6525   MatType       rtype;
6526   Mat_SeqAIJ    *a,*b;
6527   ierr = PetscCalloc1(Annz,&Aa);CHKERRQ(ierr); /* Zero matrix on device */
6528   ierr = PetscCalloc1(Bnnz,&Ba);CHKERRQ(ierr);
6529   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6530   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6531   ierr = MatDestroy(&mpiaij->A);CHKERRQ(ierr);
6532   ierr = MatDestroy(&mpiaij->B);CHKERRQ(ierr);
6533   ierr = MatGetRootType_Private(mat,&rtype);CHKERRQ(ierr);
6534   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A);CHKERRQ(ierr);
6535   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B);CHKERRQ(ierr);
6536   ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
6537 
6538   a = (Mat_SeqAIJ*)mpiaij->A->data;
6539   b = (Mat_SeqAIJ*)mpiaij->B->data;
6540   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6541   a->free_a       = b->free_a       = PETSC_TRUE;
6542   a->free_ij      = b->free_ij      = PETSC_TRUE;
6543 
6544   /* conversion must happen AFTER multiply setup */
6545   ierr = MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A);CHKERRQ(ierr);
6546   ierr = MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B);CHKERRQ(ierr);
6547   ierr = VecDestroy(&mpiaij->lvec);CHKERRQ(ierr);
6548   ierr = MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL);CHKERRQ(ierr);
6549   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec);CHKERRQ(ierr);
6550 
6551   mpiaij->coo_n   = coo_n;
6552   mpiaij->coo_sf  = sf2;
6553   mpiaij->sendlen = nleaves;
6554   mpiaij->recvlen = nroots;
6555 
6556   mpiaij->Annz1   = Annz1;
6557   mpiaij->Annz2   = Annz2;
6558   mpiaij->Bnnz1   = Bnnz1;
6559   mpiaij->Bnnz2   = Bnnz2;
6560 
6561   mpiaij->Atot1   = Atot1;
6562   mpiaij->Atot2   = Atot2;
6563   mpiaij->Btot1   = Btot1;
6564   mpiaij->Btot2   = Btot2;
6565 
6566   mpiaij->Aimap1  = Aimap1;
6567   mpiaij->Aimap2  = Aimap2;
6568   mpiaij->Bimap1  = Bimap1;
6569   mpiaij->Bimap2  = Bimap2;
6570 
6571   mpiaij->Ajmap1  = Ajmap1;
6572   mpiaij->Ajmap2  = Ajmap2;
6573   mpiaij->Bjmap1  = Bjmap1;
6574   mpiaij->Bjmap2  = Bjmap2;
6575 
6576   mpiaij->Aperm1  = Aperm1;
6577   mpiaij->Aperm2  = Aperm2;
6578   mpiaij->Bperm1  = Bperm1;
6579   mpiaij->Bperm2  = Bperm2;
6580 
6581   mpiaij->Cperm1  = Cperm1;
6582 
6583   /* Allocate in preallocation. If not used, it has zero cost on host */
6584   ierr = PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf);CHKERRQ(ierr);
6585   PetscFunctionReturn(0);
6586 }
6587 
6588 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6589 {
6590   PetscErrorCode       ierr;
6591   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6592   Mat                  A = mpiaij->A,B = mpiaij->B;
6593   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6594   PetscScalar          *Aa,*Ba;
6595   PetscScalar          *sendbuf = mpiaij->sendbuf;
6596   PetscScalar          *recvbuf = mpiaij->recvbuf;
6597   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6598   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6599   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6600   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6601 
6602   PetscFunctionBegin;
6603   ierr = MatSeqAIJGetArray(A,&Aa);CHKERRQ(ierr); /* Might read and write matrix values */
6604   ierr = MatSeqAIJGetArray(B,&Ba);CHKERRQ(ierr);
6605   if (imode == INSERT_VALUES) {
6606     ierr = PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr);
6607     ierr = PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr);
6608   }
6609 
6610   /* Pack entries to be sent to remote */
6611   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6612 
6613   /* Send remote entries to their owner and overlap the communication with local computation */
6614   ierr = PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE);CHKERRQ(ierr);
6615   /* Add local entries to A and B */
6616   for (PetscCount i=0; i<Annz1; i++) {
6617     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6618   }
6619   for (PetscCount i=0; i<Bnnz1; i++) {
6620     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6621   }
6622   ierr = PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE);CHKERRQ(ierr);
6623 
6624   /* Add received remote entries to A and B */
6625   for (PetscCount i=0; i<Annz2; i++) {
6626     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6627   }
6628   for (PetscCount i=0; i<Bnnz2; i++) {
6629     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6630   }
6631   ierr = MatSeqAIJRestoreArray(A,&Aa);CHKERRQ(ierr);
6632   ierr = MatSeqAIJRestoreArray(B,&Ba);CHKERRQ(ierr);
6633   PetscFunctionReturn(0);
6634 }
6635 
6636 /* ----------------------------------------------------------------*/
6637 
6638 /*MC
6639    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6640 
6641    Options Database Keys:
6642 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6643 
6644    Level: beginner
6645 
6646    Notes:
6647     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6648     in this case the values associated with the rows and columns one passes in are set to zero
6649     in the matrix
6650 
6651     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6652     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6653 
6654 .seealso: MatCreateAIJ()
6655 M*/
6656 
6657 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6658 {
6659   Mat_MPIAIJ     *b;
6660   PetscErrorCode ierr;
6661   PetscMPIInt    size;
6662 
6663   PetscFunctionBegin;
6664   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6665 
6666   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6667   B->data       = (void*)b;
6668   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6669   B->assembled  = PETSC_FALSE;
6670   B->insertmode = NOT_SET_VALUES;
6671   b->size       = size;
6672 
6673   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6674 
6675   /* build cache for off array entries formed */
6676   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6677 
6678   b->donotstash  = PETSC_FALSE;
6679   b->colmap      = NULL;
6680   b->garray      = NULL;
6681   b->roworiented = PETSC_TRUE;
6682 
6683   /* stuff used for matrix vector multiply */
6684   b->lvec  = NULL;
6685   b->Mvctx = NULL;
6686 
6687   /* stuff for MatGetRow() */
6688   b->rowindices   = NULL;
6689   b->rowvalues    = NULL;
6690   b->getrowactive = PETSC_FALSE;
6691 
6692   /* flexible pointer used in CUSPARSE classes */
6693   b->spptr = NULL;
6694 
6695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6696   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6697   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6698   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6699   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6700   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6701   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6702   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6703   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6704   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6705 #if defined(PETSC_HAVE_CUDA)
6706   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6707 #endif
6708 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6709   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6710 #endif
6711 #if defined(PETSC_HAVE_MKL_SPARSE)
6712   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6713 #endif
6714   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6715   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6716   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6717   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6718 #if defined(PETSC_HAVE_ELEMENTAL)
6719   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6720 #endif
6721 #if defined(PETSC_HAVE_SCALAPACK)
6722   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6723 #endif
6724   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6725   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6726 #if defined(PETSC_HAVE_HYPRE)
6727   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6728   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6729 #endif
6730   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6731   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6732   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ);CHKERRQ(ierr);
6733   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ);CHKERRQ(ierr);
6734   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6735   PetscFunctionReturn(0);
6736 }
6737 
6738 /*@C
6739      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6740          and "off-diagonal" part of the matrix in CSR format.
6741 
6742    Collective
6743 
6744    Input Parameters:
6745 +  comm - MPI communicator
6746 .  m - number of local rows (Cannot be PETSC_DECIDE)
6747 .  n - This value should be the same as the local size used in creating the
6748        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6749        calculated if N is given) For square matrices n is almost always m.
6750 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6751 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6752 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6753 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6754 .   a - matrix values
6755 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6756 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6757 -   oa - matrix values
6758 
6759    Output Parameter:
6760 .   mat - the matrix
6761 
6762    Level: advanced
6763 
6764    Notes:
6765        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6766        must free the arrays once the matrix has been destroyed and not before.
6767 
6768        The i and j indices are 0 based
6769 
6770        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6771 
6772        This sets local rows and cannot be used to set off-processor values.
6773 
6774        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6775        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6776        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6777        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6778        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6779        communication if it is known that only local entries will be set.
6780 
6781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6782           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6783 @*/
6784 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6785 {
6786   PetscErrorCode ierr;
6787   Mat_MPIAIJ     *maij;
6788 
6789   PetscFunctionBegin;
6790   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6791   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6792   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6793   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6794   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6795   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6796   maij = (Mat_MPIAIJ*) (*mat)->data;
6797 
6798   (*mat)->preallocated = PETSC_TRUE;
6799 
6800   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6801   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6802 
6803   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6804   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6805 
6806   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6807   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6808   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6809   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6810   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6811   PetscFunctionReturn(0);
6812 }
6813 
6814 typedef struct {
6815   Mat       *mp;    /* intermediate products */
6816   PetscBool *mptmp; /* is the intermediate product temporary ? */
6817   PetscInt  cp;     /* number of intermediate products */
6818 
6819   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6820   PetscInt    *startsj_s,*startsj_r;
6821   PetscScalar *bufa;
6822   Mat         P_oth;
6823 
6824   /* may take advantage of merging product->B */
6825   Mat Bloc; /* B-local by merging diag and off-diag */
6826 
6827   /* cusparse does not have support to split between symbolic and numeric phases.
6828      When api_user is true, we don't need to update the numerical values
6829      of the temporary storage */
6830   PetscBool reusesym;
6831 
6832   /* support for COO values insertion */
6833   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6834   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6835   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6836   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6837   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6838   PetscMemType mtype;
6839 
6840   /* customization */
6841   PetscBool abmerge;
6842   PetscBool P_oth_bind;
6843 } MatMatMPIAIJBACKEND;
6844 
6845 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6846 {
6847   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6848   PetscInt            i;
6849   PetscErrorCode      ierr;
6850 
6851   PetscFunctionBegin;
6852   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6853   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6854   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6855   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6856   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6857   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6858   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6859   for (i = 0; i < mmdata->cp; i++) {
6860     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6861   }
6862   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6863   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6864   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6865   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6866   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6867   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6868   PetscFunctionReturn(0);
6869 }
6870 
6871 /* Copy selected n entries with indices in idx[] of A to v[].
6872    If idx is NULL, copy the whole data array of A to v[]
6873  */
6874 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6875 {
6876   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6877   PetscErrorCode ierr;
6878 
6879   PetscFunctionBegin;
6880   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6881   if (f) {
6882     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6883   } else {
6884     const PetscScalar *vv;
6885 
6886     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6887     if (n && idx) {
6888       PetscScalar    *w = v;
6889       const PetscInt *oi = idx;
6890       PetscInt       j;
6891 
6892       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6893     } else {
6894       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6895     }
6896     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6897   }
6898   PetscFunctionReturn(0);
6899 }
6900 
6901 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6902 {
6903   MatMatMPIAIJBACKEND *mmdata;
6904   PetscInt            i,n_d,n_o;
6905   PetscErrorCode      ierr;
6906 
6907   PetscFunctionBegin;
6908   MatCheckProduct(C,1);
6909   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6910   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6911   if (!mmdata->reusesym) { /* update temporary matrices */
6912     if (mmdata->P_oth) {
6913       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6914     }
6915     if (mmdata->Bloc) {
6916       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6917     }
6918   }
6919   mmdata->reusesym = PETSC_FALSE;
6920 
6921   for (i = 0; i < mmdata->cp; i++) {
6922     PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6923     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6924   }
6925   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6926     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6927 
6928     if (mmdata->mptmp[i]) continue;
6929     if (noff) {
6930       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6931 
6932       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6933       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6934       n_o += noff;
6935       n_d += nown;
6936     } else {
6937       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6938 
6939       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6940       n_d += mm->nz;
6941     }
6942   }
6943   if (mmdata->hasoffproc) { /* offprocess insertion */
6944     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6945     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6946   }
6947   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6948   PetscFunctionReturn(0);
6949 }
6950 
6951 /* Support for Pt * A, A * P, or Pt * A * P */
6952 #define MAX_NUMBER_INTERMEDIATE 4
6953 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6954 {
6955   Mat_Product            *product = C->product;
6956   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6957   Mat_MPIAIJ             *a,*p;
6958   MatMatMPIAIJBACKEND    *mmdata;
6959   ISLocalToGlobalMapping P_oth_l2g = NULL;
6960   IS                     glob = NULL;
6961   const char             *prefix;
6962   char                   pprefix[256];
6963   const PetscInt         *globidx,*P_oth_idx;
6964   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6965   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6966   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6967                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6968                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6969   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6970 
6971   MatProductType         ptype;
6972   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6973   PetscMPIInt            size;
6974   PetscErrorCode         ierr;
6975 
6976   PetscFunctionBegin;
6977   MatCheckProduct(C,1);
6978   PetscCheckFalse(product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6979   ptype = product->type;
6980   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6981     ptype = MATPRODUCT_AB;
6982     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6983   }
6984   switch (ptype) {
6985   case MATPRODUCT_AB:
6986     A = product->A;
6987     P = product->B;
6988     m = A->rmap->n;
6989     n = P->cmap->n;
6990     M = A->rmap->N;
6991     N = P->cmap->N;
6992     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6993     break;
6994   case MATPRODUCT_AtB:
6995     P = product->A;
6996     A = product->B;
6997     m = P->cmap->n;
6998     n = A->cmap->n;
6999     M = P->cmap->N;
7000     N = A->cmap->N;
7001     hasoffproc = PETSC_TRUE;
7002     break;
7003   case MATPRODUCT_PtAP:
7004     A = product->A;
7005     P = product->B;
7006     m = P->cmap->n;
7007     n = P->cmap->n;
7008     M = P->cmap->N;
7009     N = P->cmap->N;
7010     hasoffproc = PETSC_TRUE;
7011     break;
7012   default:
7013     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7014   }
7015   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
7016   if (size == 1) hasoffproc = PETSC_FALSE;
7017 
7018   /* defaults */
7019   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
7020     mp[i]    = NULL;
7021     mptmp[i] = PETSC_FALSE;
7022     rmapt[i] = -1;
7023     cmapt[i] = -1;
7024     rmapa[i] = NULL;
7025     cmapa[i] = NULL;
7026   }
7027 
7028   /* customization */
7029   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
7030   mmdata->reusesym = product->api_user;
7031   if (ptype == MATPRODUCT_AB) {
7032     if (product->api_user) {
7033       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7034       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
7035       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7036       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7037     } else {
7038       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7039       ierr = PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
7040       ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7041       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7042     }
7043   } else if (ptype == MATPRODUCT_PtAP) {
7044     if (product->api_user) {
7045       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7046       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7047       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7048     } else {
7049       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7050       ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7051       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7052     }
7053   }
7054   a = (Mat_MPIAIJ*)A->data;
7055   p = (Mat_MPIAIJ*)P->data;
7056   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
7057   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
7058   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
7059   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
7060   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
7061 
7062   cp   = 0;
7063   switch (ptype) {
7064   case MATPRODUCT_AB: /* A * P */
7065     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
7066 
7067     /* A_diag * P_local (merged or not) */
7068     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7069       /* P is product->B */
7070       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7071       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7072       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7073       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7074       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7075       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7076       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7077       mp[cp]->product->api_user = product->api_user;
7078       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7079       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7080       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7081       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7082       rmapt[cp] = 1;
7083       cmapt[cp] = 2;
7084       cmapa[cp] = globidx;
7085       mptmp[cp] = PETSC_FALSE;
7086       cp++;
7087     } else { /* A_diag * P_diag and A_diag * P_off */
7088       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
7089       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7090       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7091       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7092       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7093       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7094       mp[cp]->product->api_user = product->api_user;
7095       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7096       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7097       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7098       rmapt[cp] = 1;
7099       cmapt[cp] = 1;
7100       mptmp[cp] = PETSC_FALSE;
7101       cp++;
7102       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
7103       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7104       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7105       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7106       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7107       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7108       mp[cp]->product->api_user = product->api_user;
7109       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7110       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7111       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7112       rmapt[cp] = 1;
7113       cmapt[cp] = 2;
7114       cmapa[cp] = p->garray;
7115       mptmp[cp] = PETSC_FALSE;
7116       cp++;
7117     }
7118 
7119     /* A_off * P_other */
7120     if (mmdata->P_oth) {
7121       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
7122       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7123       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
7124       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
7125       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
7126       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7127       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7128       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7129       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7130       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7131       mp[cp]->product->api_user = product->api_user;
7132       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7133       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7134       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7135       rmapt[cp] = 1;
7136       cmapt[cp] = 2;
7137       cmapa[cp] = P_oth_idx;
7138       mptmp[cp] = PETSC_FALSE;
7139       cp++;
7140     }
7141     break;
7142 
7143   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7144     /* A is product->B */
7145     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7146     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7147       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7148       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7149       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7150       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7151       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7152       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7153       mp[cp]->product->api_user = product->api_user;
7154       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7155       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7156       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7157       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7158       rmapt[cp] = 2;
7159       rmapa[cp] = globidx;
7160       cmapt[cp] = 2;
7161       cmapa[cp] = globidx;
7162       mptmp[cp] = PETSC_FALSE;
7163       cp++;
7164     } else {
7165       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7166       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7167       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7168       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7169       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7170       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7171       mp[cp]->product->api_user = product->api_user;
7172       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7173       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7174       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7175       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7176       rmapt[cp] = 1;
7177       cmapt[cp] = 2;
7178       cmapa[cp] = globidx;
7179       mptmp[cp] = PETSC_FALSE;
7180       cp++;
7181       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7182       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7183       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7184       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7185       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7186       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7187       mp[cp]->product->api_user = product->api_user;
7188       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7189       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7190       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7191       rmapt[cp] = 2;
7192       rmapa[cp] = p->garray;
7193       cmapt[cp] = 2;
7194       cmapa[cp] = globidx;
7195       mptmp[cp] = PETSC_FALSE;
7196       cp++;
7197     }
7198     break;
7199   case MATPRODUCT_PtAP:
7200     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
7201     /* P is product->B */
7202     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7203     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7204     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
7205     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7206     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7207     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7208     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7209     mp[cp]->product->api_user = product->api_user;
7210     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7211     PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7212     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7213     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7214     rmapt[cp] = 2;
7215     rmapa[cp] = globidx;
7216     cmapt[cp] = 2;
7217     cmapa[cp] = globidx;
7218     mptmp[cp] = PETSC_FALSE;
7219     cp++;
7220     if (mmdata->P_oth) {
7221       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
7222       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7223       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
7224       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
7225       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
7226       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7227       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7228       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7229       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7230       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7231       mp[cp]->product->api_user = product->api_user;
7232       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7233       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7234       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7235       mptmp[cp] = PETSC_TRUE;
7236       cp++;
7237       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
7238       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7239       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7240       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7241       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7242       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7243       mp[cp]->product->api_user = product->api_user;
7244       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7245       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7246       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7247       rmapt[cp] = 2;
7248       rmapa[cp] = globidx;
7249       cmapt[cp] = 2;
7250       cmapa[cp] = P_oth_idx;
7251       mptmp[cp] = PETSC_FALSE;
7252       cp++;
7253     }
7254     break;
7255   default:
7256     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7257   }
7258   /* sanity check */
7259   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7260 
7261   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
7262   for (i = 0; i < cp; i++) {
7263     mmdata->mp[i]    = mp[i];
7264     mmdata->mptmp[i] = mptmp[i];
7265   }
7266   mmdata->cp = cp;
7267   C->product->data       = mmdata;
7268   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7269   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7270 
7271   /* memory type */
7272   mmdata->mtype = PETSC_MEMTYPE_HOST;
7273   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
7274   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
7275   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7276   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7277 
7278   /* prepare coo coordinates for values insertion */
7279 
7280   /* count total nonzeros of those intermediate seqaij Mats
7281     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7282     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7283     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7284   */
7285   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7286     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7287     if (mptmp[cp]) continue;
7288     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7289       const PetscInt *rmap = rmapa[cp];
7290       const PetscInt mr = mp[cp]->rmap->n;
7291       const PetscInt rs = C->rmap->rstart;
7292       const PetscInt re = C->rmap->rend;
7293       const PetscInt *ii  = mm->i;
7294       for (i = 0; i < mr; i++) {
7295         const PetscInt gr = rmap[i];
7296         const PetscInt nz = ii[i+1] - ii[i];
7297         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7298         else ncoo_oown += nz; /* this row is local */
7299       }
7300     } else ncoo_d += mm->nz;
7301   }
7302 
7303   /*
7304     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7305 
7306     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7307 
7308     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7309 
7310     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7311     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7312     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7313 
7314     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7315     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7316   */
7317   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
7318   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
7319 
7320   /* gather (i,j) of nonzeros inserted by remote procs */
7321   if (hasoffproc) {
7322     PetscSF  msf;
7323     PetscInt ncoo2,*coo_i2,*coo_j2;
7324 
7325     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
7326     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
7327     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
7328 
7329     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7330       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7331       PetscInt   *idxoff = mmdata->off[cp];
7332       PetscInt   *idxown = mmdata->own[cp];
7333       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7334         const PetscInt *rmap = rmapa[cp];
7335         const PetscInt *cmap = cmapa[cp];
7336         const PetscInt *ii  = mm->i;
7337         PetscInt       *coi = coo_i + ncoo_o;
7338         PetscInt       *coj = coo_j + ncoo_o;
7339         const PetscInt mr = mp[cp]->rmap->n;
7340         const PetscInt rs = C->rmap->rstart;
7341         const PetscInt re = C->rmap->rend;
7342         const PetscInt cs = C->cmap->rstart;
7343         for (i = 0; i < mr; i++) {
7344           const PetscInt *jj = mm->j + ii[i];
7345           const PetscInt gr  = rmap[i];
7346           const PetscInt nz  = ii[i+1] - ii[i];
7347           if (gr < rs || gr >= re) { /* this is an offproc row */
7348             for (j = ii[i]; j < ii[i+1]; j++) {
7349               *coi++ = gr;
7350               *idxoff++ = j;
7351             }
7352             if (!cmapt[cp]) { /* already global */
7353               for (j = 0; j < nz; j++) *coj++ = jj[j];
7354             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7355               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7356             } else { /* offdiag */
7357               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7358             }
7359             ncoo_o += nz;
7360           } else { /* this is a local row */
7361             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7362           }
7363         }
7364       }
7365       mmdata->off[cp + 1] = idxoff;
7366       mmdata->own[cp + 1] = idxown;
7367     }
7368 
7369     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
7370     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
7371     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
7372     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
7373     ncoo = ncoo_d + ncoo_oown + ncoo2;
7374     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
7375     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
7376     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7377     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7378     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7379     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7380     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7381     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
7382     coo_i = coo_i2;
7383     coo_j = coo_j2;
7384   } else { /* no offproc values insertion */
7385     ncoo = ncoo_d;
7386     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
7387 
7388     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
7389     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
7390     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
7391   }
7392   mmdata->hasoffproc = hasoffproc;
7393 
7394    /* gather (i,j) of nonzeros inserted locally */
7395   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7396     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7397     PetscInt       *coi = coo_i + ncoo_d;
7398     PetscInt       *coj = coo_j + ncoo_d;
7399     const PetscInt *jj  = mm->j;
7400     const PetscInt *ii  = mm->i;
7401     const PetscInt *cmap = cmapa[cp];
7402     const PetscInt *rmap = rmapa[cp];
7403     const PetscInt mr = mp[cp]->rmap->n;
7404     const PetscInt rs = C->rmap->rstart;
7405     const PetscInt re = C->rmap->rend;
7406     const PetscInt cs = C->cmap->rstart;
7407 
7408     if (mptmp[cp]) continue;
7409     if (rmapt[cp] == 1) { /* consecutive rows */
7410       /* fill coo_i */
7411       for (i = 0; i < mr; i++) {
7412         const PetscInt gr = i + rs;
7413         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7414       }
7415       /* fill coo_j */
7416       if (!cmapt[cp]) { /* type-0, already global */
7417         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
7418       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7419         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7420       } else { /* type-2, local to global for sparse columns */
7421         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7422       }
7423       ncoo_d += mm->nz;
7424     } else if (rmapt[cp] == 2) { /* sparse rows */
7425       for (i = 0; i < mr; i++) {
7426         const PetscInt *jj = mm->j + ii[i];
7427         const PetscInt gr  = rmap[i];
7428         const PetscInt nz  = ii[i+1] - ii[i];
7429         if (gr >= rs && gr < re) { /* local rows */
7430           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7431           if (!cmapt[cp]) { /* type-0, already global */
7432             for (j = 0; j < nz; j++) *coj++ = jj[j];
7433           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7434             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7435           } else { /* type-2, local to global for sparse columns */
7436             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7437           }
7438           ncoo_d += nz;
7439         }
7440       }
7441     }
7442   }
7443   if (glob) {
7444     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
7445   }
7446   ierr = ISDestroy(&glob);CHKERRQ(ierr);
7447   if (P_oth_l2g) {
7448     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7449   }
7450   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
7451   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7452   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
7453 
7454   /* preallocate with COO data */
7455   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
7456   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7457   PetscFunctionReturn(0);
7458 }
7459 
7460 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7461 {
7462   Mat_Product    *product = mat->product;
7463   PetscErrorCode ierr;
7464 #if defined(PETSC_HAVE_DEVICE)
7465   PetscBool      match = PETSC_FALSE;
7466   PetscBool      usecpu = PETSC_FALSE;
7467 #else
7468   PetscBool      match = PETSC_TRUE;
7469 #endif
7470 
7471   PetscFunctionBegin;
7472   MatCheckProduct(mat,1);
7473 #if defined(PETSC_HAVE_DEVICE)
7474   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7475     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7476   }
7477   if (match) { /* we can always fallback to the CPU if requested */
7478     switch (product->type) {
7479     case MATPRODUCT_AB:
7480       if (product->api_user) {
7481         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7482         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7483         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7484       } else {
7485         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7486         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7487         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7488       }
7489       break;
7490     case MATPRODUCT_AtB:
7491       if (product->api_user) {
7492         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7493         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7494         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7495       } else {
7496         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7497         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7498         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7499       }
7500       break;
7501     case MATPRODUCT_PtAP:
7502       if (product->api_user) {
7503         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7504         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7505         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7506       } else {
7507         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7508         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7509         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7510       }
7511       break;
7512     default:
7513       break;
7514     }
7515     match = (PetscBool)!usecpu;
7516   }
7517 #endif
7518   if (match) {
7519     switch (product->type) {
7520     case MATPRODUCT_AB:
7521     case MATPRODUCT_AtB:
7522     case MATPRODUCT_PtAP:
7523       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7524       break;
7525     default:
7526       break;
7527     }
7528   }
7529   /* fallback to MPIAIJ ops */
7530   if (!mat->ops->productsymbolic) {
7531     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7532   }
7533   PetscFunctionReturn(0);
7534 }
7535 
7536 /*
7537     Special version for direct calls from Fortran
7538 */
7539 #include <petsc/private/fortranimpl.h>
7540 
7541 /* Change these macros so can be used in void function */
7542 /* Identical to CHKERRV, except it assigns to *_ierr */
7543 #undef CHKERRQ
7544 #define CHKERRQ(ierr) do {                                                                     \
7545     PetscErrorCode ierr_msv_mpiaij = (ierr);                                                   \
7546     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7547       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7548       return;                                                                                  \
7549     }                                                                                          \
7550   } while (0)
7551 
7552 #undef SETERRQ
7553 #define SETERRQ(comm,ierr,...) do {                                                            \
7554     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7555     return;                                                                                    \
7556   } while (0)
7557 
7558 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7559 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7560 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7561 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7562 #else
7563 #endif
7564 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7565 {
7566   Mat            mat  = *mmat;
7567   PetscInt       m    = *mm, n = *mn;
7568   InsertMode     addv = *maddv;
7569   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
7570   PetscScalar    value;
7571   PetscErrorCode ierr;
7572 
7573   MatCheckPreallocated(mat,1);
7574   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7575   else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7576   {
7577     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7578     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7579     PetscBool roworiented = aij->roworiented;
7580 
7581     /* Some Variables required in the macro */
7582     Mat        A                    = aij->A;
7583     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7584     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7585     MatScalar  *aa;
7586     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7587     Mat        B                    = aij->B;
7588     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7589     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7590     MatScalar  *ba;
7591     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7592      * cannot use "#if defined" inside a macro. */
7593     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7594 
7595     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7596     PetscInt  nonew = a->nonew;
7597     MatScalar *ap1,*ap2;
7598 
7599     PetscFunctionBegin;
7600     ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
7601     ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
7602     for (i=0; i<m; i++) {
7603       if (im[i] < 0) continue;
7604       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7605       if (im[i] >= rstart && im[i] < rend) {
7606         row      = im[i] - rstart;
7607         lastcol1 = -1;
7608         rp1      = aj + ai[row];
7609         ap1      = aa + ai[row];
7610         rmax1    = aimax[row];
7611         nrow1    = ailen[row];
7612         low1     = 0;
7613         high1    = nrow1;
7614         lastcol2 = -1;
7615         rp2      = bj + bi[row];
7616         ap2      = ba + bi[row];
7617         rmax2    = bimax[row];
7618         nrow2    = bilen[row];
7619         low2     = 0;
7620         high2    = nrow2;
7621 
7622         for (j=0; j<n; j++) {
7623           if (roworiented) value = v[i*n+j];
7624           else value = v[i+j*m];
7625           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7626           if (in[j] >= cstart && in[j] < cend) {
7627             col = in[j] - cstart;
7628             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7629           } else if (in[j] < 0) continue;
7630           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7631             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7632             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7633           } else {
7634             if (mat->was_assembled) {
7635               if (!aij->colmap) {
7636                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
7637               }
7638 #if defined(PETSC_USE_CTABLE)
7639               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
7640               col--;
7641 #else
7642               col = aij->colmap[in[j]] - 1;
7643 #endif
7644               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7645                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
7646                 col  =  in[j];
7647                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7648                 B        = aij->B;
7649                 b        = (Mat_SeqAIJ*)B->data;
7650                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7651                 rp2      = bj + bi[row];
7652                 ap2      = ba + bi[row];
7653                 rmax2    = bimax[row];
7654                 nrow2    = bilen[row];
7655                 low2     = 0;
7656                 high2    = nrow2;
7657                 bm       = aij->B->rmap->n;
7658                 ba       = b->a;
7659                 inserted = PETSC_FALSE;
7660               }
7661             } else col = in[j];
7662             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7663           }
7664         }
7665       } else if (!aij->donotstash) {
7666         if (roworiented) {
7667           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
7668         } else {
7669           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
7670         }
7671       }
7672     }
7673     ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
7674     ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
7675   }
7676   PetscFunctionReturnVoid();
7677 }
7678 /* Undefining these here since they were redefined from their original definition above! No
7679  * other PETSc functions should be defined past this point, as it is impossible to recover the
7680  * original definitions */
7681 #undef CHKERRQ
7682 #undef SETERRQ
7683