xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7487cd7ca1dddf3cbc146be559ee2e39856c5efc)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62 
63   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
64    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
65    * to differ from the parent matrix. */
66   if (a->lvec) {
67     ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr);
68   }
69   if (a->diag) {
70     ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr);
71   }
72 
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
77 {
78   PetscErrorCode ierr;
79   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
80 
81   PetscFunctionBegin;
82   if (mat->A) {
83     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
84     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
90 {
91   PetscErrorCode  ierr;
92   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
93   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
94   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
95   const PetscInt  *ia,*ib;
96   const MatScalar *aa,*bb,*aav,*bav;
97   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
98   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
99 
100   PetscFunctionBegin;
101   *keptrows = NULL;
102 
103   ia   = a->i;
104   ib   = b->i;
105   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
106   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
107   for (i=0; i<m; i++) {
108     na = ia[i+1] - ia[i];
109     nb = ib[i+1] - ib[i];
110     if (!na && !nb) {
111       cnt++;
112       goto ok1;
113     }
114     aa = aav + ia[i];
115     for (j=0; j<na; j++) {
116       if (aa[j] != 0.0) goto ok1;
117     }
118     bb = bav + ib[i];
119     for (j=0; j <nb; j++) {
120       if (bb[j] != 0.0) goto ok1;
121     }
122     cnt++;
123 ok1:;
124   }
125   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
126   if (!n0rows) {
127     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
128     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
129     PetscFunctionReturn(0);
130   }
131   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
132   cnt  = 0;
133   for (i=0; i<m; i++) {
134     na = ia[i+1] - ia[i];
135     nb = ib[i+1] - ib[i];
136     if (!na && !nb) continue;
137     aa = aav + ia[i];
138     for (j=0; j<na;j++) {
139       if (aa[j] != 0.0) {
140         rows[cnt++] = rstart + i;
141         goto ok2;
142       }
143     }
144     bb = bav + ib[i];
145     for (j=0; j<nb; j++) {
146       if (bb[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151 ok2:;
152   }
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
154   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
155   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
156   PetscFunctionReturn(0);
157 }
158 
159 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
160 {
161   PetscErrorCode    ierr;
162   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
163   PetscBool         cong;
164 
165   PetscFunctionBegin;
166   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
167   if (Y->assembled && cong) {
168     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
169   } else {
170     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
176 {
177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
178   PetscErrorCode ierr;
179   PetscInt       i,rstart,nrows,*rows;
180 
181   PetscFunctionBegin;
182   *zrows = NULL;
183   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
184   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
185   for (i=0; i<nrows; i++) rows[i] += rstart;
186   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
191 {
192   PetscErrorCode    ierr;
193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
194   PetscInt          i,m,n,*garray = aij->garray;
195   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
196   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
197   PetscReal         *work;
198   const PetscScalar *dummy;
199 
200   PetscFunctionBegin;
201   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
202   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
203   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
204   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
205   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
206   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
207   if (type == NORM_2) {
208     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
209       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
210     }
211     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
212       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
213     }
214   } else if (type == NORM_1) {
215     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
216       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
217     }
218     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
219       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
220     }
221   } else if (type == NORM_INFINITY) {
222     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
223       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     }
225     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
226       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
227     }
228   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
229     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
230       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
231     }
232     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
233       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
234     }
235   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
236     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
237       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
238     }
239     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
240       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
241     }
242   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
243   if (type == NORM_INFINITY) {
244     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
245   } else {
246     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
247   }
248   ierr = PetscFree(work);CHKERRQ(ierr);
249   if (type == NORM_2) {
250     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
251   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
252     for (i=0; i<n; i++) reductions[i] /= m;
253   }
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
258 {
259   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
260   IS              sis,gis;
261   PetscErrorCode  ierr;
262   const PetscInt  *isis,*igis;
263   PetscInt        n,*iis,nsis,ngis,rstart,i;
264 
265   PetscFunctionBegin;
266   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
267   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
268   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
269   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
270   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
271   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
274   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
275   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
276   n    = ngis + nsis;
277   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
278   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
279   for (i=0; i<n; i++) iis[i] += rstart;
280   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
281 
282   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
283   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
284   ierr = ISDestroy(&sis);CHKERRQ(ierr);
285   ierr = ISDestroy(&gis);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 /*
290   Local utility routine that creates a mapping from the global column
291 number to the local number in the off-diagonal part of the local
292 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
293 a slightly higher hash table cost; without it it is not scalable (each processor
294 has an order N integer array but is fast to access.
295 */
296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
297 {
298   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
299   PetscErrorCode ierr;
300   PetscInt       n = aij->B->cmap->n,i;
301 
302   PetscFunctionBegin;
303   PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
304 #if defined(PETSC_USE_CTABLE)
305   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
306   for (i=0; i<n; i++) {
307     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
308   }
309 #else
310   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
311   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
312   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
313 #endif
314   PetscFunctionReturn(0);
315 }
316 
317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
318 { \
319     if (col <= lastcol1)  low1 = 0;     \
320     else                 high1 = nrow1; \
321     lastcol1 = col;\
322     while (high1-low1 > 5) { \
323       t = (low1+high1)/2; \
324       if (rp1[t] > col) high1 = t; \
325       else              low1  = t; \
326     } \
327       for (_i=low1; _i<high1; _i++) { \
328         if (rp1[_i] > col) break; \
329         if (rp1[_i] == col) { \
330           if (addv == ADD_VALUES) { \
331             ap1[_i] += value;   \
332             /* Not sure LogFlops will slow dow the code or not */ \
333             (void)PetscLogFlops(1.0);   \
334            } \
335           else                    ap1[_i] = value; \
336           goto a_noinsert; \
337         } \
338       }  \
339       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
340       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
341       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
342       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
343       N = nrow1++ - 1; a->nz++; high1++; \
344       /* shift up all the later entries in this row */ \
345       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
346       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
347       rp1[_i] = col;  \
348       ap1[_i] = value;  \
349       A->nonzerostate++;\
350       a_noinsert: ; \
351       ailen[row] = nrow1; \
352 }
353 
354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
355   { \
356     if (col <= lastcol2) low2 = 0;                        \
357     else high2 = nrow2;                                   \
358     lastcol2 = col;                                       \
359     while (high2-low2 > 5) {                              \
360       t = (low2+high2)/2;                                 \
361       if (rp2[t] > col) high2 = t;                        \
362       else             low2  = t;                         \
363     }                                                     \
364     for (_i=low2; _i<high2; _i++) {                       \
365       if (rp2[_i] > col) break;                           \
366       if (rp2[_i] == col) {                               \
367         if (addv == ADD_VALUES) {                         \
368           ap2[_i] += value;                               \
369           (void)PetscLogFlops(1.0);                       \
370         }                                                 \
371         else                    ap2[_i] = value;          \
372         goto b_noinsert;                                  \
373       }                                                   \
374     }                                                     \
375     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
376     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
377     PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
379     N = nrow2++ - 1; b->nz++; high2++;                    \
380     /* shift up all the later entries in this row */      \
381     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
382     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
383     rp2[_i] = col;                                        \
384     ap2[_i] = value;                                      \
385     B->nonzerostate++;                                    \
386     b_noinsert: ;                                         \
387     bilen[row] = nrow2;                                   \
388   }
389 
390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
391 {
392   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
393   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
394   PetscErrorCode ierr;
395   PetscInt       l,*garray = mat->garray,diag;
396   PetscScalar    *aa,*ba;
397 
398   PetscFunctionBegin;
399   /* code only works for square matrices A */
400 
401   /* find size of row to the left of the diagonal part */
402   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
403   row  = row - diag;
404   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
405     if (garray[b->j[b->i[row]+l]] > diag) break;
406   }
407   if (l) {
408     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
409     ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr);
410     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
411   }
412 
413   /* diagonal part */
414   if (a->i[row+1]-a->i[row]) {
415     ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr);
416     ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
417     ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr);
418   }
419 
420   /* right of diagonal part */
421   if (b->i[row+1]-b->i[row]-l) {
422     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
423     ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
424     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
430 {
431   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
432   PetscScalar    value = 0.0;
433   PetscErrorCode ierr;
434   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
435   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
436   PetscBool      roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat        A                    = aij->A;
440   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
441   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
442   PetscBool  ignorezeroentries    = a->ignorezeroentries;
443   Mat        B                    = aij->B;
444   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
445   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
446   MatScalar  *aa,*ba;
447   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
448   PetscInt   nonew;
449   MatScalar  *ap1,*ap2;
450 
451   PetscFunctionBegin;
452   ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
453   ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j=0; j<n; j++) {
475         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
481         } else if (in[j] < 0) continue;
482         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
510               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
511                 ierr = PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
512               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
513             }
514           } else col = in[j];
515           nonew = b->nonew;
516           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
517         }
518       }
519     } else {
520       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
521       if (!aij->donotstash) {
522         mat->assembled = PETSC_FALSE;
523         if (roworiented) {
524           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
532   ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 /*
537     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
538     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
539     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
540 */
541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
542 {
543   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
544   Mat            A           = aij->A; /* diagonal part of the matrix */
545   Mat            B           = aij->B; /* offdiagonal part of the matrix */
546   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
547   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
548   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
549   PetscInt       *ailen      = a->ilen,*aj = a->j;
550   PetscInt       *bilen      = b->ilen,*bj = b->j;
551   PetscInt       am          = aij->A->rmap->n,j;
552   PetscInt       diag_so_far = 0,dnz;
553   PetscInt       offd_so_far = 0,onz;
554 
555   PetscFunctionBegin;
556   /* Iterate over all rows of the matrix */
557   for (j=0; j<am; j++) {
558     dnz = onz = 0;
559     /*  Iterate over all non-zero columns of the current row */
560     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
561       /* If column is in the diagonal */
562       if (mat_j[col] >= cstart && mat_j[col] < cend) {
563         aj[diag_so_far++] = mat_j[col] - cstart;
564         dnz++;
565       } else { /* off-diagonal entries */
566         bj[offd_so_far++] = mat_j[col];
567         onz++;
568       }
569     }
570     ailen[j] = dnz;
571     bilen[j] = onz;
572   }
573   PetscFunctionReturn(0);
574 }
575 
576 /*
577     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
578     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
579     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
580     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
581     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
582 */
583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
584 {
585   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
586   Mat            A      = aij->A; /* diagonal part of the matrix */
587   Mat            B      = aij->B; /* offdiagonal part of the matrix */
588   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
589   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
590   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
591   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
592   PetscInt       *ailen = a->ilen,*aj = a->j;
593   PetscInt       *bilen = b->ilen,*bj = b->j;
594   PetscInt       am     = aij->A->rmap->n,j;
595   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
596   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
597   PetscScalar    *aa = a->a,*ba = b->a;
598 
599   PetscFunctionBegin;
600   /* Iterate over all rows of the matrix */
601   for (j=0; j<am; j++) {
602     dnz_row = onz_row = 0;
603     rowstart_offd = full_offd_i[j];
604     rowstart_diag = full_diag_i[j];
605     /*  Iterate over all non-zero columns of the current row */
606     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
607       /* If column is in the diagonal */
608       if (mat_j[col] >= cstart && mat_j[col] < cend) {
609         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
610         aa[rowstart_diag+dnz_row] = mat_a[col];
611         dnz_row++;
612       } else { /* off-diagonal entries */
613         bj[rowstart_offd+onz_row] = mat_j[col];
614         ba[rowstart_offd+onz_row] = mat_a[col];
615         onz_row++;
616       }
617     }
618     ailen[j] = dnz_row;
619     bilen[j] = onz_row;
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
625 {
626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
627   PetscErrorCode ierr;
628   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
629   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
630 
631   PetscFunctionBegin;
632   for (i=0; i<m; i++) {
633     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,idxm[i]);*/
634     PetscCheckFalse(idxm[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
635     if (idxm[i] >= rstart && idxm[i] < rend) {
636       row = idxm[i] - rstart;
637       for (j=0; j<n; j++) {
638         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %" PetscInt_FMT,idxn[j]); */
639         PetscCheckFalse(idxn[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
640         if (idxn[j] >= cstart && idxn[j] < cend) {
641           col  = idxn[j] - cstart;
642           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
643         } else {
644           if (!aij->colmap) {
645             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
646           }
647 #if defined(PETSC_USE_CTABLE)
648           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
654           else {
655             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
656           }
657         }
658       }
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
665 {
666   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
667   PetscErrorCode ierr;
668   PetscInt       nstash,reallocs;
669 
670   PetscFunctionBegin;
671   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
672 
673   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
674   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
675   ierr = PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
680 {
681   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
682   PetscErrorCode ierr;
683   PetscMPIInt    n;
684   PetscInt       i,j,rstart,ncols,flg;
685   PetscInt       *row,*col;
686   PetscBool      other_disassembled;
687   PetscScalar    *val;
688 
689   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
690 
691   PetscFunctionBegin;
692   if (!aij->donotstash && !mat->nooffprocentries) {
693     while (1) {
694       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
695       if (!flg) break;
696 
697       for (i=0; i<n;) {
698         /* Now identify the consecutive vals belonging to the same row */
699         for (j=i,rstart=row[j]; j<n; j++) {
700           if (row[j] != rstart) break;
701         }
702         if (j < n) ncols = j-i;
703         else       ncols = n-i;
704         /* Now assemble all these values with a single function call */
705         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
706         i    = j;
707       }
708     }
709     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
710   }
711 #if defined(PETSC_HAVE_DEVICE)
712   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
713   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
714   if (mat->boundtocpu) {
715     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
716     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
717   }
718 #endif
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourself, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
730     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738 #if defined(PETSC_HAVE_DEVICE)
739   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
740 #endif
741   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
742   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
743 
744   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
745 
746   aij->rowvalues = NULL;
747 
748   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
749 
750   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
751   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
752     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
753     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
754   }
755 #if defined(PETSC_HAVE_DEVICE)
756   mat->offloadmask = PETSC_OFFLOAD_BOTH;
757 #endif
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
762 {
763   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
768   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
775   PetscObjectState sA, sB;
776   PetscInt        *lrows;
777   PetscInt         r, len;
778   PetscBool        cong, lch, gch;
779   PetscErrorCode   ierr;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
784   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
795     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
803     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
824     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
834     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
835   }
836   ierr = PetscFree(lrows);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscErrorCode    ierr;
851   PetscMPIInt       n = A->rmap->n;
852   PetscInt          i,j,r,m,len = 0;
853   PetscInt          *lrows,*owners = A->rmap->range;
854   PetscMPIInt       p = 0;
855   PetscSFNode       *rrows;
856   PetscSF           sf;
857   const PetscScalar *xx;
858   PetscScalar       *bb,*mask,*aij_a;
859   Vec               xmask,lmask;
860   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
861   const PetscInt    *aj, *ii,*ridx;
862   PetscScalar       *aa;
863 
864   PetscFunctionBegin;
865   /* Create SF where leaves are input rows and roots are owned rows */
866   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
867   for (r = 0; r < n; ++r) lrows[r] = -1;
868   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
869   for (r = 0; r < N; ++r) {
870     const PetscInt idx   = rows[r];
871     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
872     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
873       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
874     }
875     rrows[r].rank  = p;
876     rrows[r].index = rows[r] - owners[p];
877   }
878   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
879   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
880   /* Collect flags for rows to be zeroed */
881   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
882   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
883   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
884   /* Compress and put in row numbers */
885   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
886   /* zero diagonal part of matrix */
887   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
888   /* handle off diagonal part of matrix */
889   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
890   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
891   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
892   for (i=0; i<len; i++) bb[lrows[i]] = 1;
893   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
894   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
897   if (x && b) { /* this code is buggy when the row and column layout don't match */
898     PetscBool cong;
899 
900     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
901     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
902     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
905     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
906   }
907   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
908   /* remove zeroed rows of off diagonal matrix */
909   ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr);
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij_a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij_a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x && b) {
951     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
952     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
953   }
954   ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr);
955   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
956   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
957   ierr = PetscFree(lrows);CHKERRQ(ierr);
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971   PetscInt       nt;
972   VecScatter     Mvctx = a->Mvctx;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
977   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
979   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   VecScatter     Mvctx = a->Mvctx;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1003   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1009 {
1010   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   /* do nondiagonal part */
1015   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1016   /* do local part */
1017   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1018   /* add partial results together */
1019   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1025 {
1026   MPI_Comm       comm;
1027   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1028   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1029   IS             Me,Notme;
1030   PetscErrorCode ierr;
1031   PetscInt       M,N,first,last,*notme,i;
1032   PetscBool      lf;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1039   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1067 {
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   /* do nondiagonal part */
1082   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1083   /* do local part */
1084   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1085   /* add partial results together */
1086   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /*
1092   This only works correctly for square matrices where the subblock A->A is the
1093    diagonal block
1094 */
1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1096 {
1097   PetscErrorCode ierr;
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099 
1100   PetscFunctionBegin;
1101   PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1102   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1103   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1114   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1119 {
1120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124 #if defined(PETSC_USE_LOG)
1125   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1126 #endif
1127   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1130   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1131 #if defined(PETSC_USE_CTABLE)
1132   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1133 #else
1134   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1135 #endif
1136   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1137   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1138   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1139   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1140   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1141 
1142   /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1143   ierr = PetscSFDestroy(&aij->coo_sf);CHKERRQ(ierr);
1144   ierr = PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1);CHKERRQ(ierr);
1145   ierr = PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2);CHKERRQ(ierr);
1146   ierr = PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2);CHKERRQ(ierr);
1147   ierr = PetscFree2(aij->sendbuf,aij->recvbuf);CHKERRQ(ierr);
1148   ierr = PetscFree(aij->Cperm1);CHKERRQ(ierr);
1149 
1150   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1151 
1152   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1153   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1154 
1155   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1157   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1165 #if defined(PETSC_HAVE_CUDA)
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1167 #endif
1168 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1170 #endif
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1172 #if defined(PETSC_HAVE_ELEMENTAL)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1174 #endif
1175 #if defined(PETSC_HAVE_SCALAPACK)
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1177 #endif
1178 #if defined(PETSC_HAVE_HYPRE)
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1188 #if defined(PETSC_HAVE_MKL_SPARSE)
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1190 #endif
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1193   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1194   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1200 {
1201   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1202   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1203   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1204   const PetscInt    *garray = aij->garray;
1205   const PetscScalar *aa,*ba;
1206   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1207   PetscInt          *rowlens;
1208   PetscInt          *colidxs;
1209   PetscScalar       *matvals;
1210   PetscErrorCode    ierr;
1211 
1212   PetscFunctionBegin;
1213   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1214 
1215   M  = mat->rmap->N;
1216   N  = mat->cmap->N;
1217   m  = mat->rmap->n;
1218   rs = mat->rmap->rstart;
1219   cs = mat->cmap->rstart;
1220   nz = A->nz + B->nz;
1221 
1222   /* write matrix header */
1223   header[0] = MAT_FILE_CLASSID;
1224   header[1] = M; header[2] = N; header[3] = nz;
1225   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1226   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1227 
1228   /* fill in and store row lengths  */
1229   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1230   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1231   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1232   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1233 
1234   /* fill in and store column indices */
1235   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1236   for (cnt=0, i=0; i<m; i++) {
1237     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       colidxs[cnt++] = garray[B->j[jb]];
1240     }
1241     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1242       colidxs[cnt++] = A->j[ja] + cs;
1243     for (; jb<B->i[i+1]; jb++)
1244       colidxs[cnt++] = garray[B->j[jb]];
1245   }
1246   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1247   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1248   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1249 
1250   /* fill in and store nonzero values */
1251   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1252   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1253   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1254   for (cnt=0, i=0; i<m; i++) {
1255     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1256       if (garray[B->j[jb]] > cs) break;
1257       matvals[cnt++] = ba[jb];
1258     }
1259     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1260       matvals[cnt++] = aa[ja];
1261     for (; jb<B->i[i+1]; jb++)
1262       matvals[cnt++] = ba[jb];
1263   }
1264   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1265   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1266   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1267   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1268   ierr = PetscFree(matvals);CHKERRQ(ierr);
1269 
1270   /* write block size option to the viewer's .info file */
1271   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #include <petscdraw.h>
1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1277 {
1278   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1279   PetscErrorCode    ierr;
1280   PetscMPIInt       rank = aij->rank,size = aij->size;
1281   PetscBool         isdraw,iascii,isbinary;
1282   PetscViewer       sviewer;
1283   PetscViewerFormat format;
1284 
1285   PetscFunctionBegin;
1286   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1287   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1288   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1289   if (iascii) {
1290     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1291     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1292       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1293       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1294       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1295       for (i=0; i<(PetscInt)size; i++) {
1296         nmax = PetscMax(nmax,nz[i]);
1297         nmin = PetscMin(nmin,nz[i]);
1298         navg += nz[i];
1299       }
1300       ierr = PetscFree(nz);CHKERRQ(ierr);
1301       navg = navg/size;
1302       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax);CHKERRQ(ierr);
1303       PetscFunctionReturn(0);
1304     }
1305     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1306     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1307       MatInfo   info;
1308       PetscInt *inodes=NULL;
1309 
1310       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1311       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1312       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1313       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1314       if (!inodes) {
1315         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1316                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1317       } else {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1320       }
1321       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1323       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1324       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1325       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1326       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1328       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1329       PetscFunctionReturn(0);
1330     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1331       PetscInt inodecount,inodelimit,*inodes;
1332       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1333       if (inodes) {
1334         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit);CHKERRQ(ierr);
1335       } else {
1336         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1337       }
1338       PetscFunctionReturn(0);
1339     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1340       PetscFunctionReturn(0);
1341     }
1342   } else if (isbinary) {
1343     if (size == 1) {
1344       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1345       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1346     } else {
1347       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1348     }
1349     PetscFunctionReturn(0);
1350   } else if (iascii && size == 1) {
1351     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1352     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1353     PetscFunctionReturn(0);
1354   } else if (isdraw) {
1355     PetscDraw draw;
1356     PetscBool isnull;
1357     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1358     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1359     if (isnull) PetscFunctionReturn(0);
1360   }
1361 
1362   { /* assemble the entire matrix onto first processor */
1363     Mat A = NULL, Av;
1364     IS  isrow,iscol;
1365 
1366     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1367     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1368     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1369     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1370 /*  The commented code uses MatCreateSubMatrices instead */
1371 /*
1372     Mat *AA, A = NULL, Av;
1373     IS  isrow,iscol;
1374 
1375     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1376     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1377     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1378     if (rank == 0) {
1379        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1380        A    = AA[0];
1381        Av   = AA[0];
1382     }
1383     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1384 */
1385     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1386     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1387     /*
1388        Everyone has to call to draw the matrix since the graphics waits are
1389        synchronized across all processors that share the PetscDraw object
1390     */
1391     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1392     if (rank == 0) {
1393       if (((PetscObject)mat)->name) {
1394         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1395       }
1396       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1397     }
1398     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1399     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1400     ierr = MatDestroy(&A);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1406 {
1407   PetscErrorCode ierr;
1408   PetscBool      iascii,isdraw,issocket,isbinary;
1409 
1410   PetscFunctionBegin;
1411   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1412   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1414   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1415   if (iascii || isdraw || isbinary || issocket) {
1416     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1422 {
1423   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1424   PetscErrorCode ierr;
1425   Vec            bb1 = NULL;
1426   PetscBool      hasop;
1427 
1428   PetscFunctionBegin;
1429   if (flag == SOR_APPLY_UPPER) {
1430     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1431     PetscFunctionReturn(0);
1432   }
1433 
1434   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1435     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1436   }
1437 
1438   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1439     if (flag & SOR_ZERO_INITIAL_GUESS) {
1440       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1441       its--;
1442     }
1443 
1444     while (its--) {
1445       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1446       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1447 
1448       /* update rhs: bb1 = bb - B*x */
1449       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1450       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1451 
1452       /* local sweep */
1453       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1454     }
1455   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1456     if (flag & SOR_ZERO_INITIAL_GUESS) {
1457       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1458       its--;
1459     }
1460     while (its--) {
1461       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463 
1464       /* update rhs: bb1 = bb - B*x */
1465       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1466       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1467 
1468       /* local sweep */
1469       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1470     }
1471   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1472     if (flag & SOR_ZERO_INITIAL_GUESS) {
1473       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1474       its--;
1475     }
1476     while (its--) {
1477       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479 
1480       /* update rhs: bb1 = bb - B*x */
1481       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1482       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1483 
1484       /* local sweep */
1485       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1486     }
1487   } else if (flag & SOR_EISENSTAT) {
1488     Vec xx1;
1489 
1490     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1491     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1492 
1493     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495     if (!mat->diag) {
1496       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1497       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1498     }
1499     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1500     if (hasop) {
1501       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1502     } else {
1503       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1504     }
1505     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1506 
1507     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1508 
1509     /* local sweep */
1510     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1511     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1512     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1513   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1514 
1515   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1516 
1517   matin->factorerrortype = mat->A->factorerrortype;
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1522 {
1523   Mat            aA,aB,Aperm;
1524   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1525   PetscScalar    *aa,*ba;
1526   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1527   PetscSF        rowsf,sf;
1528   IS             parcolp = NULL;
1529   PetscBool      done;
1530   PetscErrorCode ierr;
1531 
1532   PetscFunctionBegin;
1533   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1534   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1535   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1536   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1537 
1538   /* Invert row permutation to find out where my rows should go */
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1542   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1543   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1545 
1546   /* Invert column permutation to find out where my columns should go */
1547   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1548   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1549   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1550   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1551   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1552   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1553   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1554 
1555   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1556   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1557   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1558 
1559   /* Find out where my gcols should go */
1560   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1561   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1562   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1563   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1564   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1565   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1566   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1567   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1568 
1569   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1570   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1571   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1572   for (i=0; i<m; i++) {
1573     PetscInt    row = rdest[i];
1574     PetscMPIInt rowner;
1575     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1576     for (j=ai[i]; j<ai[i+1]; j++) {
1577       PetscInt    col = cdest[aj[j]];
1578       PetscMPIInt cowner;
1579       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1580       if (rowner == cowner) dnnz[i]++;
1581       else onnz[i]++;
1582     }
1583     for (j=bi[i]; j<bi[i+1]; j++) {
1584       PetscInt    col = gcdest[bj[j]];
1585       PetscMPIInt cowner;
1586       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1587       if (rowner == cowner) dnnz[i]++;
1588       else onnz[i]++;
1589     }
1590   }
1591   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1592   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1594   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1595   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1596 
1597   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1598   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1599   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1600   for (i=0; i<m; i++) {
1601     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1602     PetscInt j0,rowlen;
1603     rowlen = ai[i+1] - ai[i];
1604     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1605       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1606       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1607     }
1608     rowlen = bi[i+1] - bi[i];
1609     for (j0=j=0; j<rowlen; j0=j) {
1610       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1611       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1612     }
1613   }
1614   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1615   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1616   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1617   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1618   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1619   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1620   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1621   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1622   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1623   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1624   *B = Aperm;
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1629 {
1630   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1631   PetscErrorCode ierr;
1632 
1633   PetscFunctionBegin;
1634   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1635   if (ghosts) *ghosts = aij->garray;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1640 {
1641   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1642   Mat            A    = mat->A,B = mat->B;
1643   PetscErrorCode ierr;
1644   PetscLogDouble isend[5],irecv[5];
1645 
1646   PetscFunctionBegin;
1647   info->block_size = 1.0;
1648   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1649 
1650   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1651   isend[3] = info->memory;  isend[4] = info->mallocs;
1652 
1653   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1654 
1655   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1656   isend[3] += info->memory;  isend[4] += info->mallocs;
1657   if (flag == MAT_LOCAL) {
1658     info->nz_used      = isend[0];
1659     info->nz_allocated = isend[1];
1660     info->nz_unneeded  = isend[2];
1661     info->memory       = isend[3];
1662     info->mallocs      = isend[4];
1663   } else if (flag == MAT_GLOBAL_MAX) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else if (flag == MAT_GLOBAL_SUM) {
1672     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   }
1680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1681   info->fill_ratio_needed = 0;
1682   info->factor_mallocs    = 0;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1687 {
1688   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1689   PetscErrorCode ierr;
1690 
1691   PetscFunctionBegin;
1692   switch (op) {
1693   case MAT_NEW_NONZERO_LOCATIONS:
1694   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1695   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1696   case MAT_KEEP_NONZERO_PATTERN:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698   case MAT_USE_INODES:
1699   case MAT_IGNORE_ZERO_ENTRIES:
1700   case MAT_FORM_EXPLICIT_TRANSPOSE:
1701     MatCheckPreallocated(A,1);
1702     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1703     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1704     break;
1705   case MAT_ROW_ORIENTED:
1706     MatCheckPreallocated(A,1);
1707     a->roworiented = flg;
1708 
1709     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1710     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1711     break;
1712   case MAT_FORCE_DIAGONAL_ENTRIES:
1713   case MAT_SORTED_FULL:
1714     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1715     break;
1716   case MAT_IGNORE_OFF_PROC_ENTRIES:
1717     a->donotstash = flg;
1718     break;
1719   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1720   case MAT_SPD:
1721   case MAT_SYMMETRIC:
1722   case MAT_STRUCTURALLY_SYMMETRIC:
1723   case MAT_HERMITIAN:
1724   case MAT_SYMMETRY_ETERNAL:
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   case MAT_STRUCTURE_ONLY:
1730     /* The option is handled directly by MatSetOption() */
1731     break;
1732   default:
1733     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1739 {
1740   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1741   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1742   PetscErrorCode ierr;
1743   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1744   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1745   PetscInt       *cmap,*idx_p;
1746 
1747   PetscFunctionBegin;
1748   PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1749   mat->getrowactive = PETSC_TRUE;
1750 
1751   if (!mat->rowvalues && (idx || v)) {
1752     /*
1753         allocate enough space to hold information from the longest row.
1754     */
1755     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1756     PetscInt   max = 1,tmp;
1757     for (i=0; i<matin->rmap->n; i++) {
1758       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1759       if (max < tmp) max = tmp;
1760     }
1761     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1762   }
1763 
1764   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1765   lrow = row - rstart;
1766 
1767   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1768   if (!v)   {pvA = NULL; pvB = NULL;}
1769   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1770   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1771   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1772   nztot = nzA + nzB;
1773 
1774   cmap = mat->garray;
1775   if (v  || idx) {
1776     if (nztot) {
1777       /* Sort by increasing column numbers, assuming A and B already sorted */
1778       PetscInt imark = -1;
1779       if (v) {
1780         *v = v_p = mat->rowvalues;
1781         for (i=0; i<nzB; i++) {
1782           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1783           else break;
1784         }
1785         imark = i;
1786         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1787         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1788       }
1789       if (idx) {
1790         *idx = idx_p = mat->rowindices;
1791         if (imark > -1) {
1792           for (i=0; i<imark; i++) {
1793             idx_p[i] = cmap[cworkB[i]];
1794           }
1795         } else {
1796           for (i=0; i<nzB; i++) {
1797             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1798             else break;
1799           }
1800           imark = i;
1801         }
1802         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1803         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1804       }
1805     } else {
1806       if (idx) *idx = NULL;
1807       if (v)   *v   = NULL;
1808     }
1809   }
1810   *nz  = nztot;
1811   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1812   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1817 {
1818   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1819 
1820   PetscFunctionBegin;
1821   PetscCheckFalse(!aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1822   aij->getrowactive = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1827 {
1828   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1829   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1830   PetscErrorCode  ierr;
1831   PetscInt        i,j,cstart = mat->cmap->rstart;
1832   PetscReal       sum = 0.0;
1833   const MatScalar *v,*amata,*bmata;
1834 
1835   PetscFunctionBegin;
1836   if (aij->size == 1) {
1837     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1838   } else {
1839     ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr);
1840     ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1841     if (type == NORM_FROBENIUS) {
1842       v = amata;
1843       for (i=0; i<amat->nz; i++) {
1844         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1845       }
1846       v = bmata;
1847       for (i=0; i<bmat->nz; i++) {
1848         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1849       }
1850       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1851       *norm = PetscSqrtReal(*norm);
1852       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1853     } else if (type == NORM_1) { /* max column norm */
1854       PetscReal *tmp,*tmp2;
1855       PetscInt  *jj,*garray = aij->garray;
1856       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1857       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1858       *norm = 0.0;
1859       v     = amata; jj = amat->j;
1860       for (j=0; j<amat->nz; j++) {
1861         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1862       }
1863       v = bmata; jj = bmat->j;
1864       for (j=0; j<bmat->nz; j++) {
1865         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1866       }
1867       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1868       for (j=0; j<mat->cmap->N; j++) {
1869         if (tmp2[j] > *norm) *norm = tmp2[j];
1870       }
1871       ierr = PetscFree(tmp);CHKERRQ(ierr);
1872       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1873       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1874     } else if (type == NORM_INFINITY) { /* max row norm */
1875       PetscReal ntemp = 0.0;
1876       for (j=0; j<aij->A->rmap->n; j++) {
1877         v   = amata + amat->i[j];
1878         sum = 0.0;
1879         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1880           sum += PetscAbsScalar(*v); v++;
1881         }
1882         v = bmata + bmat->i[j];
1883         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1884           sum += PetscAbsScalar(*v); v++;
1885         }
1886         if (sum > ntemp) ntemp = sum;
1887       }
1888       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1889       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1890     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1891     ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr);
1892     ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1893   }
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1898 {
1899   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1900   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1901   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1902   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1903   PetscErrorCode  ierr;
1904   Mat             B,A_diag,*B_diag;
1905   const MatScalar *pbv,*bv;
1906 
1907   PetscFunctionBegin;
1908   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1909   ai = Aloc->i; aj = Aloc->j;
1910   bi = Bloc->i; bj = Bloc->j;
1911   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1912     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1913     PetscSFNode          *oloc;
1914     PETSC_UNUSED PetscSF sf;
1915 
1916     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1917     /* compute d_nnz for preallocation */
1918     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1919     for (i=0; i<ai[ma]; i++) {
1920       d_nnz[aj[i]]++;
1921     }
1922     /* compute local off-diagonal contributions */
1923     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1924     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1925     /* map those to global */
1926     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1927     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1928     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1929     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1930     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1931     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1932     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1933 
1934     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1935     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1936     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1937     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1938     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1939     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1940   } else {
1941     B    = *matout;
1942     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1943   }
1944 
1945   b           = (Mat_MPIAIJ*)B->data;
1946   A_diag      = a->A;
1947   B_diag      = &b->A;
1948   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1949   A_diag_ncol = A_diag->cmap->N;
1950   B_diag_ilen = sub_B_diag->ilen;
1951   B_diag_i    = sub_B_diag->i;
1952 
1953   /* Set ilen for diagonal of B */
1954   for (i=0; i<A_diag_ncol; i++) {
1955     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1956   }
1957 
1958   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1959   very quickly (=without using MatSetValues), because all writes are local. */
1960   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1961 
1962   /* copy over the B part */
1963   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1964   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1965   pbv  = bv;
1966   row  = A->rmap->rstart;
1967   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1968   cols_tmp = cols;
1969   for (i=0; i<mb; i++) {
1970     ncol = bi[i+1]-bi[i];
1971     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1972     row++;
1973     pbv += ncol; cols_tmp += ncol;
1974   }
1975   ierr = PetscFree(cols);CHKERRQ(ierr);
1976   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1977 
1978   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1979   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1980   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1981     *matout = B;
1982   } else {
1983     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1989 {
1990   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1991   Mat            a    = aij->A,b = aij->B;
1992   PetscErrorCode ierr;
1993   PetscInt       s1,s2,s3;
1994 
1995   PetscFunctionBegin;
1996   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1997   if (rr) {
1998     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1999     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2000     /* Overlap communication with computation. */
2001     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2002   }
2003   if (ll) {
2004     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2005     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2006     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2007   }
2008   /* scale  the diagonal block */
2009   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2010 
2011   if (rr) {
2012     /* Do a scatter end and then right scale the off-diagonal block */
2013     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2014     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2020 {
2021   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2030 {
2031   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2032   Mat            a,b,c,d;
2033   PetscBool      flg;
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   a = matA->A; b = matA->B;
2038   c = matB->A; d = matB->B;
2039 
2040   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2041   if (flg) {
2042     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2043   }
2044   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2049 {
2050   PetscErrorCode ierr;
2051   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2052   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2053 
2054   PetscFunctionBegin;
2055   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2056   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2057     /* because of the column compression in the off-processor part of the matrix a->B,
2058        the number of columns in a->B and b->B may be different, hence we cannot call
2059        the MatCopy() directly on the two parts. If need be, we can provide a more
2060        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2061        then copying the submatrices */
2062     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2063   } else {
2064     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2065     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2066   }
2067   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2072 {
2073   PetscErrorCode ierr;
2074 
2075   PetscFunctionBegin;
2076   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 /*
2081    Computes the number of nonzeros per row needed for preallocation when X and Y
2082    have different nonzero structure.
2083 */
2084 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2085 {
2086   PetscInt       i,j,k,nzx,nzy;
2087 
2088   PetscFunctionBegin;
2089   /* Set the number of nonzeros in the new matrix */
2090   for (i=0; i<m; i++) {
2091     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2092     nzx = xi[i+1] - xi[i];
2093     nzy = yi[i+1] - yi[i];
2094     nnz[i] = 0;
2095     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2096       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2097       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2098       nnz[i]++;
2099     }
2100     for (; k<nzy; k++) nnz[i]++;
2101   }
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2106 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2107 {
2108   PetscErrorCode ierr;
2109   PetscInt       m = Y->rmap->N;
2110   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2111   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2112 
2113   PetscFunctionBegin;
2114   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2119 {
2120   PetscErrorCode ierr;
2121   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2122 
2123   PetscFunctionBegin;
2124   if (str == SAME_NONZERO_PATTERN) {
2125     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2126     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2127   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2128     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2129   } else {
2130     Mat      B;
2131     PetscInt *nnz_d,*nnz_o;
2132 
2133     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2134     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2135     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2136     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2137     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2138     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2139     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2140     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2141     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2142     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2143     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
2144     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2145     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2151 
2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2153 {
2154 #if defined(PETSC_USE_COMPLEX)
2155   PetscErrorCode ierr;
2156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2157 
2158   PetscFunctionBegin;
2159   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2160   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2161 #else
2162   PetscFunctionBegin;
2163 #endif
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2174   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2179 {
2180   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2185   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2190 {
2191   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2192   PetscErrorCode    ierr;
2193   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2194   PetscScalar       *va,*vv;
2195   Vec               vB,vA;
2196   const PetscScalar *vb;
2197 
2198   PetscFunctionBegin;
2199   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2200   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2201 
2202   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2203   if (idx) {
2204     for (i=0; i<m; i++) {
2205       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2206     }
2207   }
2208 
2209   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2210   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2211   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2212 
2213   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2214   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2215   for (i=0; i<m; i++) {
2216     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2217       vv[i] = vb[i];
2218       if (idx) idx[i] = a->garray[idxb[i]];
2219     } else {
2220       vv[i] = va[i];
2221       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2222         idx[i] = a->garray[idxb[i]];
2223     }
2224   }
2225   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2226   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2227   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2228   ierr = PetscFree(idxb);CHKERRQ(ierr);
2229   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2230   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2235 {
2236   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2237   PetscInt          m = A->rmap->n,n = A->cmap->n;
2238   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2239   PetscInt          *cmap  = mat->garray;
2240   PetscInt          *diagIdx, *offdiagIdx;
2241   Vec               diagV, offdiagV;
2242   PetscScalar       *a, *diagA, *offdiagA;
2243   const PetscScalar *ba,*bav;
2244   PetscInt          r,j,col,ncols,*bi,*bj;
2245   PetscErrorCode    ierr;
2246   Mat               B = mat->B;
2247   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2248 
2249   PetscFunctionBegin;
2250   /* When a process holds entire A and other processes have no entry */
2251   if (A->cmap->N == n) {
2252     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2253     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2254     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2255     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2256     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2257     PetscFunctionReturn(0);
2258   } else if (n == 0) {
2259     if (m) {
2260       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2261       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2262       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2263     }
2264     PetscFunctionReturn(0);
2265   }
2266 
2267   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2268   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2269   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2270   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2271 
2272   /* Get offdiagIdx[] for implicit 0.0 */
2273   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2274   ba   = bav;
2275   bi   = b->i;
2276   bj   = b->j;
2277   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2278   for (r = 0; r < m; r++) {
2279     ncols = bi[r+1] - bi[r];
2280     if (ncols == A->cmap->N - n) { /* Brow is dense */
2281       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2282     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2283       offdiagA[r] = 0.0;
2284 
2285       /* Find first hole in the cmap */
2286       for (j=0; j<ncols; j++) {
2287         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2288         if (col > j && j < cstart) {
2289           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2290           break;
2291         } else if (col > j + n && j >= cstart) {
2292           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2293           break;
2294         }
2295       }
2296       if (j == ncols && ncols < A->cmap->N - n) {
2297         /* a hole is outside compressed Bcols */
2298         if (ncols == 0) {
2299           if (cstart) {
2300             offdiagIdx[r] = 0;
2301           } else offdiagIdx[r] = cend;
2302         } else { /* ncols > 0 */
2303           offdiagIdx[r] = cmap[ncols-1] + 1;
2304           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2305         }
2306       }
2307     }
2308 
2309     for (j=0; j<ncols; j++) {
2310       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2311       ba++; bj++;
2312     }
2313   }
2314 
2315   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2316   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2317   for (r = 0; r < m; ++r) {
2318     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2319       a[r]   = diagA[r];
2320       if (idx) idx[r] = cstart + diagIdx[r];
2321     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) {
2324         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2325           idx[r] = cstart + diagIdx[r];
2326         } else idx[r] = offdiagIdx[r];
2327       }
2328     } else {
2329       a[r]   = offdiagA[r];
2330       if (idx) idx[r] = offdiagIdx[r];
2331     }
2332   }
2333   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2334   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2335   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2336   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2337   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2338   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2339   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2344 {
2345   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2346   PetscInt          m = A->rmap->n,n = A->cmap->n;
2347   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2348   PetscInt          *cmap  = mat->garray;
2349   PetscInt          *diagIdx, *offdiagIdx;
2350   Vec               diagV, offdiagV;
2351   PetscScalar       *a, *diagA, *offdiagA;
2352   const PetscScalar *ba,*bav;
2353   PetscInt          r,j,col,ncols,*bi,*bj;
2354   PetscErrorCode    ierr;
2355   Mat               B = mat->B;
2356   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2357 
2358   PetscFunctionBegin;
2359   /* When a process holds entire A and other processes have no entry */
2360   if (A->cmap->N == n) {
2361     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2362     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2363     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2364     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2365     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2366     PetscFunctionReturn(0);
2367   } else if (n == 0) {
2368     if (m) {
2369       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2370       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2371       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2372     }
2373     PetscFunctionReturn(0);
2374   }
2375 
2376   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2377   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2378   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2379   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2380 
2381   /* Get offdiagIdx[] for implicit 0.0 */
2382   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2383   ba   = bav;
2384   bi   = b->i;
2385   bj   = b->j;
2386   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2387   for (r = 0; r < m; r++) {
2388     ncols = bi[r+1] - bi[r];
2389     if (ncols == A->cmap->N - n) { /* Brow is dense */
2390       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2391     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2392       offdiagA[r] = 0.0;
2393 
2394       /* Find first hole in the cmap */
2395       for (j=0; j<ncols; j++) {
2396         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2397         if (col > j && j < cstart) {
2398           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2399           break;
2400         } else if (col > j + n && j >= cstart) {
2401           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2402           break;
2403         }
2404       }
2405       if (j == ncols && ncols < A->cmap->N - n) {
2406         /* a hole is outside compressed Bcols */
2407         if (ncols == 0) {
2408           if (cstart) {
2409             offdiagIdx[r] = 0;
2410           } else offdiagIdx[r] = cend;
2411         } else { /* ncols > 0 */
2412           offdiagIdx[r] = cmap[ncols-1] + 1;
2413           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2414         }
2415       }
2416     }
2417 
2418     for (j=0; j<ncols; j++) {
2419       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2420       ba++; bj++;
2421     }
2422   }
2423 
2424   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2425   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2426   for (r = 0; r < m; ++r) {
2427     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2428       a[r]   = diagA[r];
2429       if (idx) idx[r] = cstart + diagIdx[r];
2430     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2431       a[r] = diagA[r];
2432       if (idx) {
2433         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2434           idx[r] = cstart + diagIdx[r];
2435         } else idx[r] = offdiagIdx[r];
2436       }
2437     } else {
2438       a[r]   = offdiagA[r];
2439       if (idx) idx[r] = offdiagIdx[r];
2440     }
2441   }
2442   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2443   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2444   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2445   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2446   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2447   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2448   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2453 {
2454   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2455   PetscInt          m = A->rmap->n,n = A->cmap->n;
2456   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2457   PetscInt          *cmap  = mat->garray;
2458   PetscInt          *diagIdx, *offdiagIdx;
2459   Vec               diagV, offdiagV;
2460   PetscScalar       *a, *diagA, *offdiagA;
2461   const PetscScalar *ba,*bav;
2462   PetscInt          r,j,col,ncols,*bi,*bj;
2463   PetscErrorCode    ierr;
2464   Mat               B = mat->B;
2465   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2466 
2467   PetscFunctionBegin;
2468   /* When a process holds entire A and other processes have no entry */
2469   if (A->cmap->N == n) {
2470     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2471     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2472     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2473     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2474     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2475     PetscFunctionReturn(0);
2476   } else if (n == 0) {
2477     if (m) {
2478       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2479       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2480       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2481     }
2482     PetscFunctionReturn(0);
2483   }
2484 
2485   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2488   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2492   ba   = bav;
2493   bi   = b->i;
2494   bj   = b->j;
2495   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r+1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2500     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2501       offdiagA[r] = 0.0;
2502 
2503       /* Find first hole in the cmap */
2504       for (j=0; j<ncols; j++) {
2505         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2506         if (col > j && j < cstart) {
2507           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2508           break;
2509         } else if (col > j + n && j >= cstart) {
2510           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2511           break;
2512         }
2513       }
2514       if (j == ncols && ncols < A->cmap->N - n) {
2515         /* a hole is outside compressed Bcols */
2516         if (ncols == 0) {
2517           if (cstart) {
2518             offdiagIdx[r] = 0;
2519           } else offdiagIdx[r] = cend;
2520         } else { /* ncols > 0 */
2521           offdiagIdx[r] = cmap[ncols-1] + 1;
2522           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2523         }
2524       }
2525     }
2526 
2527     for (j=0; j<ncols; j++) {
2528       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2529       ba++; bj++;
2530     }
2531   }
2532 
2533   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2534   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2535   for (r = 0; r < m; ++r) {
2536     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2537       a[r] = diagA[r];
2538       if (idx) idx[r] = cstart + diagIdx[r];
2539     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2540       a[r] = diagA[r];
2541       if (idx) {
2542         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2543           idx[r] = cstart + diagIdx[r];
2544         } else idx[r] = offdiagIdx[r];
2545       }
2546     } else {
2547       a[r] = offdiagA[r];
2548       if (idx) idx[r] = offdiagIdx[r];
2549     }
2550   }
2551   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2552   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2553   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2554   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2555   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2556   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2557   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2562 {
2563   PetscErrorCode ierr;
2564   Mat            *dummy;
2565 
2566   PetscFunctionBegin;
2567   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2568   *newmat = *dummy;
2569   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2574 {
2575   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2576   PetscErrorCode ierr;
2577 
2578   PetscFunctionBegin;
2579   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2580   A->factorerrortype = a->A->factorerrortype;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2585 {
2586   PetscErrorCode ierr;
2587   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2588 
2589   PetscFunctionBegin;
2590   PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2591   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2592   if (x->assembled) {
2593     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2594   } else {
2595     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2596   }
2597   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2598   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2603 {
2604   PetscFunctionBegin;
2605   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2606   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 /*@
2611    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2612 
2613    Collective on Mat
2614 
2615    Input Parameters:
2616 +    A - the matrix
2617 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2618 
2619  Level: advanced
2620 
2621 @*/
2622 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2623 {
2624   PetscErrorCode       ierr;
2625 
2626   PetscFunctionBegin;
2627   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2632 {
2633   PetscErrorCode       ierr;
2634   PetscBool            sc = PETSC_FALSE,flg;
2635 
2636   PetscFunctionBegin;
2637   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2638   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2639   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2640   if (flg) {
2641     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2642   }
2643   ierr = PetscOptionsTail();CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2648 {
2649   PetscErrorCode ierr;
2650   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2651   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2652 
2653   PetscFunctionBegin;
2654   if (!Y->preallocated) {
2655     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2656   } else if (!aij->nz) {
2657     PetscInt nonew = aij->nonew;
2658     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2659     aij->nonew = nonew;
2660   }
2661   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2666 {
2667   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2668   PetscErrorCode ierr;
2669 
2670   PetscFunctionBegin;
2671   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2672   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2673   if (d) {
2674     PetscInt rstart;
2675     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2676     *d += rstart;
2677 
2678   }
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2683 {
2684   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2685   PetscErrorCode ierr;
2686 
2687   PetscFunctionBegin;
2688   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 /* -------------------------------------------------------------------*/
2693 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2694                                        MatGetRow_MPIAIJ,
2695                                        MatRestoreRow_MPIAIJ,
2696                                        MatMult_MPIAIJ,
2697                                 /* 4*/ MatMultAdd_MPIAIJ,
2698                                        MatMultTranspose_MPIAIJ,
2699                                        MatMultTransposeAdd_MPIAIJ,
2700                                        NULL,
2701                                        NULL,
2702                                        NULL,
2703                                 /*10*/ NULL,
2704                                        NULL,
2705                                        NULL,
2706                                        MatSOR_MPIAIJ,
2707                                        MatTranspose_MPIAIJ,
2708                                 /*15*/ MatGetInfo_MPIAIJ,
2709                                        MatEqual_MPIAIJ,
2710                                        MatGetDiagonal_MPIAIJ,
2711                                        MatDiagonalScale_MPIAIJ,
2712                                        MatNorm_MPIAIJ,
2713                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2714                                        MatAssemblyEnd_MPIAIJ,
2715                                        MatSetOption_MPIAIJ,
2716                                        MatZeroEntries_MPIAIJ,
2717                                 /*24*/ MatZeroRows_MPIAIJ,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        NULL,
2722                                 /*29*/ MatSetUp_MPIAIJ,
2723                                        NULL,
2724                                        NULL,
2725                                        MatGetDiagonalBlock_MPIAIJ,
2726                                        NULL,
2727                                 /*34*/ MatDuplicate_MPIAIJ,
2728                                        NULL,
2729                                        NULL,
2730                                        NULL,
2731                                        NULL,
2732                                 /*39*/ MatAXPY_MPIAIJ,
2733                                        MatCreateSubMatrices_MPIAIJ,
2734                                        MatIncreaseOverlap_MPIAIJ,
2735                                        MatGetValues_MPIAIJ,
2736                                        MatCopy_MPIAIJ,
2737                                 /*44*/ MatGetRowMax_MPIAIJ,
2738                                        MatScale_MPIAIJ,
2739                                        MatShift_MPIAIJ,
2740                                        MatDiagonalSet_MPIAIJ,
2741                                        MatZeroRowsColumns_MPIAIJ,
2742                                 /*49*/ MatSetRandom_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2748                                        NULL,
2749                                        MatSetUnfactored_MPIAIJ,
2750                                        MatPermute_MPIAIJ,
2751                                        NULL,
2752                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2753                                        MatDestroy_MPIAIJ,
2754                                        MatView_MPIAIJ,
2755                                        NULL,
2756                                        NULL,
2757                                 /*64*/ NULL,
2758                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2763                                        MatGetRowMinAbs_MPIAIJ,
2764                                        NULL,
2765                                        NULL,
2766                                        NULL,
2767                                        NULL,
2768                                 /*75*/ MatFDColoringApply_AIJ,
2769                                        MatSetFromOptions_MPIAIJ,
2770                                        NULL,
2771                                        NULL,
2772                                        MatFindZeroDiagonals_MPIAIJ,
2773                                 /*80*/ NULL,
2774                                        NULL,
2775                                        NULL,
2776                                 /*83*/ MatLoad_MPIAIJ,
2777                                        MatIsSymmetric_MPIAIJ,
2778                                        NULL,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                 /*89*/ NULL,
2783                                        NULL,
2784                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        MatBindToCPU_MPIAIJ,
2792                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2793                                        NULL,
2794                                        NULL,
2795                                        MatConjugate_MPIAIJ,
2796                                        NULL,
2797                                 /*104*/MatSetValuesRow_MPIAIJ,
2798                                        MatRealPart_MPIAIJ,
2799                                        MatImaginaryPart_MPIAIJ,
2800                                        NULL,
2801                                        NULL,
2802                                 /*109*/NULL,
2803                                        NULL,
2804                                        MatGetRowMin_MPIAIJ,
2805                                        NULL,
2806                                        MatMissingDiagonal_MPIAIJ,
2807                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2808                                        NULL,
2809                                        MatGetGhosts_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        MatGetMultiProcBlock_MPIAIJ,
2817                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2818                                        MatGetColumnReductions_MPIAIJ,
2819                                        MatInvertBlockDiagonal_MPIAIJ,
2820                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2821                                        MatCreateSubMatricesMPI_MPIAIJ,
2822                                 /*129*/NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2826                                        NULL,
2827                                 /*134*/NULL,
2828                                        NULL,
2829                                        NULL,
2830                                        NULL,
2831                                        NULL,
2832                                 /*139*/MatSetBlockSizes_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        MatFDColoringSetUp_MPIXAIJ,
2836                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2837                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2838                                 /*145*/NULL,
2839                                        NULL,
2840                                        NULL
2841 };
2842 
2843 /* ----------------------------------------------------------------------------------------*/
2844 
2845 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2846 {
2847   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2848   PetscErrorCode ierr;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2852   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2857 {
2858   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2859   PetscErrorCode ierr;
2860 
2861   PetscFunctionBegin;
2862   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2863   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2868 {
2869   Mat_MPIAIJ     *b;
2870   PetscErrorCode ierr;
2871   PetscMPIInt    size;
2872 
2873   PetscFunctionBegin;
2874   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2875   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2876   b = (Mat_MPIAIJ*)B->data;
2877 
2878 #if defined(PETSC_USE_CTABLE)
2879   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2880 #else
2881   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2882 #endif
2883   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2884   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2885   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2886 
2887   /* Because the B will have been resized we simply destroy it and create a new one each time */
2888   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2889   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2890   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2891   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2892   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2893   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2894   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2895 
2896   if (!B->preallocated) {
2897     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2898     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2899     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2900     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2901     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2902   }
2903 
2904   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2905   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2906   B->preallocated  = PETSC_TRUE;
2907   B->was_assembled = PETSC_FALSE;
2908   B->assembled     = PETSC_FALSE;
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2913 {
2914   Mat_MPIAIJ     *b;
2915   PetscErrorCode ierr;
2916 
2917   PetscFunctionBegin;
2918   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2919   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2920   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2921   b = (Mat_MPIAIJ*)B->data;
2922 
2923 #if defined(PETSC_USE_CTABLE)
2924   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2925 #else
2926   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2927 #endif
2928   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2929   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2930   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2931 
2932   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2933   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2934   B->preallocated  = PETSC_TRUE;
2935   B->was_assembled = PETSC_FALSE;
2936   B->assembled = PETSC_FALSE;
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2941 {
2942   Mat            mat;
2943   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2944   PetscErrorCode ierr;
2945 
2946   PetscFunctionBegin;
2947   *newmat = NULL;
2948   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2949   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2950   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2951   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2952   a       = (Mat_MPIAIJ*)mat->data;
2953 
2954   mat->factortype   = matin->factortype;
2955   mat->assembled    = matin->assembled;
2956   mat->insertmode   = NOT_SET_VALUES;
2957   mat->preallocated = matin->preallocated;
2958 
2959   a->size         = oldmat->size;
2960   a->rank         = oldmat->rank;
2961   a->donotstash   = oldmat->donotstash;
2962   a->roworiented  = oldmat->roworiented;
2963   a->rowindices   = NULL;
2964   a->rowvalues    = NULL;
2965   a->getrowactive = PETSC_FALSE;
2966 
2967   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2968   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2969 
2970   if (oldmat->colmap) {
2971 #if defined(PETSC_USE_CTABLE)
2972     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2973 #else
2974     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2975     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2976     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2977 #endif
2978   } else a->colmap = NULL;
2979   if (oldmat->garray) {
2980     PetscInt len;
2981     len  = oldmat->B->cmap->n;
2982     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2983     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2984     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2985   } else a->garray = NULL;
2986 
2987   /* It may happen MatDuplicate is called with a non-assembled matrix
2988      In fact, MatDuplicate only requires the matrix to be preallocated
2989      This may happen inside a DMCreateMatrix_Shell */
2990   if (oldmat->lvec) {
2991     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2992     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2993   }
2994   if (oldmat->Mvctx) {
2995     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2996     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2997   }
2998   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2999   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3000   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3001   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3002   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3003   *newmat = mat;
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3008 {
3009   PetscBool      isbinary, ishdf5;
3010   PetscErrorCode ierr;
3011 
3012   PetscFunctionBegin;
3013   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3014   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3015   /* force binary viewer to load .info file if it has not yet done so */
3016   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3017   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3018   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3019   if (isbinary) {
3020     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3021   } else if (ishdf5) {
3022 #if defined(PETSC_HAVE_HDF5)
3023     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3024 #else
3025     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3026 #endif
3027   } else {
3028     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3029   }
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3034 {
3035   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3036   PetscInt       *rowidxs,*colidxs;
3037   PetscScalar    *matvals;
3038   PetscErrorCode ierr;
3039 
3040   PetscFunctionBegin;
3041   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3042 
3043   /* read in matrix header */
3044   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3045   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3046   M  = header[1]; N = header[2]; nz = header[3];
3047   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3048   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3049   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3050 
3051   /* set block sizes from the viewer's .info file */
3052   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3053   /* set global sizes if not set already */
3054   if (mat->rmap->N < 0) mat->rmap->N = M;
3055   if (mat->cmap->N < 0) mat->cmap->N = N;
3056   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3057   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3058 
3059   /* check if the matrix sizes are correct */
3060   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3061   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3062 
3063   /* read in row lengths and build row indices */
3064   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3065   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3066   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3067   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3068   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3069   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3070   /* read in column indices and matrix values */
3071   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3072   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3073   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3074   /* store matrix indices and values */
3075   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3076   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3077   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 /* Not scalable because of ISAllGather() unless getting all columns. */
3082 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3083 {
3084   PetscErrorCode ierr;
3085   IS             iscol_local;
3086   PetscBool      isstride;
3087   PetscMPIInt    lisstride=0,gisstride;
3088 
3089   PetscFunctionBegin;
3090   /* check if we are grabbing all columns*/
3091   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3092 
3093   if (isstride) {
3094     PetscInt  start,len,mstart,mlen;
3095     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3096     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3097     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3098     if (mstart == start && mlen-mstart == len) lisstride = 1;
3099   }
3100 
3101   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3102   if (gisstride) {
3103     PetscInt N;
3104     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3105     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3106     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3107     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3108   } else {
3109     PetscInt cbs;
3110     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3111     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3112     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3113   }
3114 
3115   *isseq = iscol_local;
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 /*
3120  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3121  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3122 
3123  Input Parameters:
3124    mat - matrix
3125    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3126            i.e., mat->rstart <= isrow[i] < mat->rend
3127    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3128            i.e., mat->cstart <= iscol[i] < mat->cend
3129  Output Parameter:
3130    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3131    iscol_o - sequential column index set for retrieving mat->B
3132    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3133  */
3134 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3135 {
3136   PetscErrorCode ierr;
3137   Vec            x,cmap;
3138   const PetscInt *is_idx;
3139   PetscScalar    *xarray,*cmaparray;
3140   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3141   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3142   Mat            B=a->B;
3143   Vec            lvec=a->lvec,lcmap;
3144   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3145   MPI_Comm       comm;
3146   VecScatter     Mvctx=a->Mvctx;
3147 
3148   PetscFunctionBegin;
3149   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3150   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3151 
3152   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3153   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3154   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3155   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3156   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3157 
3158   /* Get start indices */
3159   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3160   isstart -= ncols;
3161   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3162 
3163   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3164   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3165   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3166   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3167   for (i=0; i<ncols; i++) {
3168     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3169     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3170     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3171   }
3172   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3173   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3174   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3175 
3176   /* Get iscol_d */
3177   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3178   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3179   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3180 
3181   /* Get isrow_d */
3182   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3183   rstart = mat->rmap->rstart;
3184   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3185   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3186   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3187   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3188 
3189   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3190   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3191   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3192 
3193   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3194   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3195   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3196 
3197   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3198 
3199   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3200   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3201 
3202   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3203   /* off-process column indices */
3204   count = 0;
3205   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3206   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3207 
3208   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3209   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3210   for (i=0; i<Bn; i++) {
3211     if (PetscRealPart(xarray[i]) > -1.0) {
3212       idx[count]     = i;                   /* local column index in off-diagonal part B */
3213       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3214       count++;
3215     }
3216   }
3217   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3218   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3219 
3220   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3221   /* cannot ensure iscol_o has same blocksize as iscol! */
3222 
3223   ierr = PetscFree(idx);CHKERRQ(ierr);
3224   *garray = cmap1;
3225 
3226   ierr = VecDestroy(&x);CHKERRQ(ierr);
3227   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3228   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3233 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3234 {
3235   PetscErrorCode ierr;
3236   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3237   Mat            M = NULL;
3238   MPI_Comm       comm;
3239   IS             iscol_d,isrow_d,iscol_o;
3240   Mat            Asub = NULL,Bsub = NULL;
3241   PetscInt       n;
3242 
3243   PetscFunctionBegin;
3244   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3245 
3246   if (call == MAT_REUSE_MATRIX) {
3247     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3248     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3249     PetscCheckFalse(!isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3250 
3251     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3252     PetscCheckFalse(!iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3253 
3254     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3255     PetscCheckFalse(!iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3256 
3257     /* Update diagonal and off-diagonal portions of submat */
3258     asub = (Mat_MPIAIJ*)(*submat)->data;
3259     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3260     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3261     if (n) {
3262       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3263     }
3264     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3266 
3267   } else { /* call == MAT_INITIAL_MATRIX) */
3268     const PetscInt *garray;
3269     PetscInt        BsubN;
3270 
3271     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3272     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3273 
3274     /* Create local submatrices Asub and Bsub */
3275     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3276     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3277 
3278     /* Create submatrix M */
3279     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3280 
3281     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3282     asub = (Mat_MPIAIJ*)M->data;
3283 
3284     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3285     n = asub->B->cmap->N;
3286     if (BsubN > n) {
3287       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3288       const PetscInt *idx;
3289       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3290       ierr = PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3291 
3292       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3293       j = 0;
3294       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3295       for (i=0; i<n; i++) {
3296         if (j >= BsubN) break;
3297         while (subgarray[i] > garray[j]) j++;
3298 
3299         if (subgarray[i] == garray[j]) {
3300           idx_new[i] = idx[j++];
3301         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3302       }
3303       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3304 
3305       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3306       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3307 
3308     } else if (BsubN < n) {
3309       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3310     }
3311 
3312     ierr = PetscFree(garray);CHKERRQ(ierr);
3313     *submat = M;
3314 
3315     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3316     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3317     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3318 
3319     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3320     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3321 
3322     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3323     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3324   }
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3329 {
3330   PetscErrorCode ierr;
3331   IS             iscol_local=NULL,isrow_d;
3332   PetscInt       csize;
3333   PetscInt       n,i,j,start,end;
3334   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3335   MPI_Comm       comm;
3336 
3337   PetscFunctionBegin;
3338   /* If isrow has same processor distribution as mat,
3339      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3340   if (call == MAT_REUSE_MATRIX) {
3341     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3342     if (isrow_d) {
3343       sameRowDist  = PETSC_TRUE;
3344       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3345     } else {
3346       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3347       if (iscol_local) {
3348         sameRowDist  = PETSC_TRUE;
3349         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3350       }
3351     }
3352   } else {
3353     /* Check if isrow has same processor distribution as mat */
3354     sameDist[0] = PETSC_FALSE;
3355     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3356     if (!n) {
3357       sameDist[0] = PETSC_TRUE;
3358     } else {
3359       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3360       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3361       if (i >= start && j < end) {
3362         sameDist[0] = PETSC_TRUE;
3363       }
3364     }
3365 
3366     /* Check if iscol has same processor distribution as mat */
3367     sameDist[1] = PETSC_FALSE;
3368     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3369     if (!n) {
3370       sameDist[1] = PETSC_TRUE;
3371     } else {
3372       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3373       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3374       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3375     }
3376 
3377     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3378     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3379     sameRowDist = tsameDist[0];
3380   }
3381 
3382   if (sameRowDist) {
3383     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3384       /* isrow and iscol have same processor distribution as mat */
3385       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3386       PetscFunctionReturn(0);
3387     } else { /* sameRowDist */
3388       /* isrow has same processor distribution as mat */
3389       if (call == MAT_INITIAL_MATRIX) {
3390         PetscBool sorted;
3391         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3392         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3393         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3394         PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3395 
3396         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3397         if (sorted) {
3398           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3399           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3400           PetscFunctionReturn(0);
3401         }
3402       } else { /* call == MAT_REUSE_MATRIX */
3403         IS iscol_sub;
3404         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3405         if (iscol_sub) {
3406           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3407           PetscFunctionReturn(0);
3408         }
3409       }
3410     }
3411   }
3412 
3413   /* General case: iscol -> iscol_local which has global size of iscol */
3414   if (call == MAT_REUSE_MATRIX) {
3415     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3416     PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3417   } else {
3418     if (!iscol_local) {
3419       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3420     }
3421   }
3422 
3423   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3424   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3425 
3426   if (call == MAT_INITIAL_MATRIX) {
3427     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3428     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3429   }
3430   PetscFunctionReturn(0);
3431 }
3432 
3433 /*@C
3434      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3435          and "off-diagonal" part of the matrix in CSR format.
3436 
3437    Collective
3438 
3439    Input Parameters:
3440 +  comm - MPI communicator
3441 .  A - "diagonal" portion of matrix
3442 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3443 -  garray - global index of B columns
3444 
3445    Output Parameter:
3446 .   mat - the matrix, with input A as its local diagonal matrix
3447    Level: advanced
3448 
3449    Notes:
3450        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3451        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3452 
3453 .seealso: MatCreateMPIAIJWithSplitArrays()
3454 @*/
3455 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3456 {
3457   PetscErrorCode    ierr;
3458   Mat_MPIAIJ        *maij;
3459   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3460   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3461   const PetscScalar *oa;
3462   Mat               Bnew;
3463   PetscInt          m,n,N;
3464 
3465   PetscFunctionBegin;
3466   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3467   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3468   PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3469   PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3470   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3471   /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3472 
3473   /* Get global columns of mat */
3474   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3475 
3476   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3477   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3478   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3479   maij = (Mat_MPIAIJ*)(*mat)->data;
3480 
3481   (*mat)->preallocated = PETSC_TRUE;
3482 
3483   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3484   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3485 
3486   /* Set A as diagonal portion of *mat */
3487   maij->A = A;
3488 
3489   nz = oi[m];
3490   for (i=0; i<nz; i++) {
3491     col   = oj[i];
3492     oj[i] = garray[col];
3493   }
3494 
3495   /* Set Bnew as off-diagonal portion of *mat */
3496   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3497   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3498   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3499   bnew        = (Mat_SeqAIJ*)Bnew->data;
3500   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3501   maij->B     = Bnew;
3502 
3503   PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3504 
3505   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3506   b->free_a       = PETSC_FALSE;
3507   b->free_ij      = PETSC_FALSE;
3508   ierr = MatDestroy(&B);CHKERRQ(ierr);
3509 
3510   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3511   bnew->free_a       = PETSC_TRUE;
3512   bnew->free_ij      = PETSC_TRUE;
3513 
3514   /* condense columns of maij->B */
3515   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3516   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3517   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3518   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3519   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3520   PetscFunctionReturn(0);
3521 }
3522 
3523 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3524 
3525 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3526 {
3527   PetscErrorCode ierr;
3528   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3529   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3530   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3531   Mat            M,Msub,B=a->B;
3532   MatScalar      *aa;
3533   Mat_SeqAIJ     *aij;
3534   PetscInt       *garray = a->garray,*colsub,Ncols;
3535   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3536   IS             iscol_sub,iscmap;
3537   const PetscInt *is_idx,*cmap;
3538   PetscBool      allcolumns=PETSC_FALSE;
3539   MPI_Comm       comm;
3540 
3541   PetscFunctionBegin;
3542   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3543   if (call == MAT_REUSE_MATRIX) {
3544     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3545     PetscCheckFalse(!iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3546     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3547 
3548     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3549     PetscCheckFalse(!iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3550 
3551     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3552     PetscCheckFalse(!Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3553 
3554     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3555 
3556   } else { /* call == MAT_INITIAL_MATRIX) */
3557     PetscBool flg;
3558 
3559     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3560     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3561 
3562     /* (1) iscol -> nonscalable iscol_local */
3563     /* Check for special case: each processor gets entire matrix columns */
3564     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3565     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3566     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3567     if (allcolumns) {
3568       iscol_sub = iscol_local;
3569       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3570       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3571 
3572     } else {
3573       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3574       PetscInt *idx,*cmap1,k;
3575       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3576       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3577       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3578       count = 0;
3579       k     = 0;
3580       for (i=0; i<Ncols; i++) {
3581         j = is_idx[i];
3582         if (j >= cstart && j < cend) {
3583           /* diagonal part of mat */
3584           idx[count]     = j;
3585           cmap1[count++] = i; /* column index in submat */
3586         } else if (Bn) {
3587           /* off-diagonal part of mat */
3588           if (j == garray[k]) {
3589             idx[count]     = j;
3590             cmap1[count++] = i;  /* column index in submat */
3591           } else if (j > garray[k]) {
3592             while (j > garray[k] && k < Bn-1) k++;
3593             if (j == garray[k]) {
3594               idx[count]     = j;
3595               cmap1[count++] = i; /* column index in submat */
3596             }
3597           }
3598         }
3599       }
3600       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3601 
3602       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3603       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3604       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3605 
3606       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3607     }
3608 
3609     /* (3) Create sequential Msub */
3610     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3611   }
3612 
3613   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3614   aij  = (Mat_SeqAIJ*)(Msub)->data;
3615   ii   = aij->i;
3616   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3617 
3618   /*
3619       m - number of local rows
3620       Ncols - number of columns (same on all processors)
3621       rstart - first row in new global matrix generated
3622   */
3623   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3624 
3625   if (call == MAT_INITIAL_MATRIX) {
3626     /* (4) Create parallel newmat */
3627     PetscMPIInt    rank,size;
3628     PetscInt       csize;
3629 
3630     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3631     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3632 
3633     /*
3634         Determine the number of non-zeros in the diagonal and off-diagonal
3635         portions of the matrix in order to do correct preallocation
3636     */
3637 
3638     /* first get start and end of "diagonal" columns */
3639     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3640     if (csize == PETSC_DECIDE) {
3641       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3642       if (mglobal == Ncols) { /* square matrix */
3643         nlocal = m;
3644       } else {
3645         nlocal = Ncols/size + ((Ncols % size) > rank);
3646       }
3647     } else {
3648       nlocal = csize;
3649     }
3650     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3651     rstart = rend - nlocal;
3652     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3653 
3654     /* next, compute all the lengths */
3655     jj    = aij->j;
3656     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3657     olens = dlens + m;
3658     for (i=0; i<m; i++) {
3659       jend = ii[i+1] - ii[i];
3660       olen = 0;
3661       dlen = 0;
3662       for (j=0; j<jend; j++) {
3663         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3664         else dlen++;
3665         jj++;
3666       }
3667       olens[i] = olen;
3668       dlens[i] = dlen;
3669     }
3670 
3671     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3672     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3673 
3674     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3675     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3676     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3677     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3678     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3679     ierr = PetscFree(dlens);CHKERRQ(ierr);
3680 
3681   } else { /* call == MAT_REUSE_MATRIX */
3682     M    = *newmat;
3683     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3684     PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3685     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3686     /*
3687          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3688        rather than the slower MatSetValues().
3689     */
3690     M->was_assembled = PETSC_TRUE;
3691     M->assembled     = PETSC_FALSE;
3692   }
3693 
3694   /* (5) Set values of Msub to *newmat */
3695   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3696   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3697 
3698   jj   = aij->j;
3699   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3700   for (i=0; i<m; i++) {
3701     row = rstart + i;
3702     nz  = ii[i+1] - ii[i];
3703     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3704     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3705     jj += nz; aa += nz;
3706   }
3707   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3708   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3709 
3710   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712 
3713   ierr = PetscFree(colsub);CHKERRQ(ierr);
3714 
3715   /* save Msub, iscol_sub and iscmap used in processor for next request */
3716   if (call == MAT_INITIAL_MATRIX) {
3717     *newmat = M;
3718     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3719     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3720 
3721     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3722     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3723 
3724     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3725     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3726 
3727     if (iscol_local) {
3728       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3729       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3730     }
3731   }
3732   PetscFunctionReturn(0);
3733 }
3734 
3735 /*
3736     Not great since it makes two copies of the submatrix, first an SeqAIJ
3737   in local and then by concatenating the local matrices the end result.
3738   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3739 
3740   Note: This requires a sequential iscol with all indices.
3741 */
3742 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3743 {
3744   PetscErrorCode ierr;
3745   PetscMPIInt    rank,size;
3746   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3747   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3748   Mat            M,Mreuse;
3749   MatScalar      *aa,*vwork;
3750   MPI_Comm       comm;
3751   Mat_SeqAIJ     *aij;
3752   PetscBool      colflag,allcolumns=PETSC_FALSE;
3753 
3754   PetscFunctionBegin;
3755   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3756   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3757   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3758 
3759   /* Check for special case: each processor gets entire matrix columns */
3760   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3761   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3762   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3763   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3764 
3765   if (call ==  MAT_REUSE_MATRIX) {
3766     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3767     PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3768     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3769   } else {
3770     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3771   }
3772 
3773   /*
3774       m - number of local rows
3775       n - number of columns (same on all processors)
3776       rstart - first row in new global matrix generated
3777   */
3778   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3779   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3780   if (call == MAT_INITIAL_MATRIX) {
3781     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3782     ii  = aij->i;
3783     jj  = aij->j;
3784 
3785     /*
3786         Determine the number of non-zeros in the diagonal and off-diagonal
3787         portions of the matrix in order to do correct preallocation
3788     */
3789 
3790     /* first get start and end of "diagonal" columns */
3791     if (csize == PETSC_DECIDE) {
3792       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3793       if (mglobal == n) { /* square matrix */
3794         nlocal = m;
3795       } else {
3796         nlocal = n/size + ((n % size) > rank);
3797       }
3798     } else {
3799       nlocal = csize;
3800     }
3801     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3802     rstart = rend - nlocal;
3803     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3804 
3805     /* next, compute all the lengths */
3806     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3807     olens = dlens + m;
3808     for (i=0; i<m; i++) {
3809       jend = ii[i+1] - ii[i];
3810       olen = 0;
3811       dlen = 0;
3812       for (j=0; j<jend; j++) {
3813         if (*jj < rstart || *jj >= rend) olen++;
3814         else dlen++;
3815         jj++;
3816       }
3817       olens[i] = olen;
3818       dlens[i] = dlen;
3819     }
3820     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3821     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3822     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3823     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3824     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3825     ierr = PetscFree(dlens);CHKERRQ(ierr);
3826   } else {
3827     PetscInt ml,nl;
3828 
3829     M    = *newmat;
3830     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3831     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3832     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3833     /*
3834          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3835        rather than the slower MatSetValues().
3836     */
3837     M->was_assembled = PETSC_TRUE;
3838     M->assembled     = PETSC_FALSE;
3839   }
3840   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3841   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3842   ii   = aij->i;
3843   jj   = aij->j;
3844 
3845   /* trigger copy to CPU if needed */
3846   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3847   for (i=0; i<m; i++) {
3848     row   = rstart + i;
3849     nz    = ii[i+1] - ii[i];
3850     cwork = jj; jj += nz;
3851     vwork = aa; aa += nz;
3852     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3853   }
3854   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3855 
3856   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3857   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3858   *newmat = M;
3859 
3860   /* save submatrix used in processor for next request */
3861   if (call ==  MAT_INITIAL_MATRIX) {
3862     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3863     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3864   }
3865   PetscFunctionReturn(0);
3866 }
3867 
3868 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3869 {
3870   PetscInt       m,cstart, cend,j,nnz,i,d;
3871   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3872   const PetscInt *JJ;
3873   PetscErrorCode ierr;
3874   PetscBool      nooffprocentries;
3875 
3876   PetscFunctionBegin;
3877   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3878 
3879   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3880   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3881   m      = B->rmap->n;
3882   cstart = B->cmap->rstart;
3883   cend   = B->cmap->rend;
3884   rstart = B->rmap->rstart;
3885 
3886   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3887 
3888   if (PetscDefined(USE_DEBUG)) {
3889     for (i=0; i<m; i++) {
3890       nnz = Ii[i+1]- Ii[i];
3891       JJ  = J + Ii[i];
3892       PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3893       PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3894       PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3895     }
3896   }
3897 
3898   for (i=0; i<m; i++) {
3899     nnz     = Ii[i+1]- Ii[i];
3900     JJ      = J + Ii[i];
3901     nnz_max = PetscMax(nnz_max,nnz);
3902     d       = 0;
3903     for (j=0; j<nnz; j++) {
3904       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3905     }
3906     d_nnz[i] = d;
3907     o_nnz[i] = nnz - d;
3908   }
3909   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3910   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3911 
3912   for (i=0; i<m; i++) {
3913     ii   = i + rstart;
3914     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3915   }
3916   nooffprocentries    = B->nooffprocentries;
3917   B->nooffprocentries = PETSC_TRUE;
3918   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3919   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3920   B->nooffprocentries = nooffprocentries;
3921 
3922   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 /*@
3927    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3928    (the default parallel PETSc format).
3929 
3930    Collective
3931 
3932    Input Parameters:
3933 +  B - the matrix
3934 .  i - the indices into j for the start of each local row (starts with zero)
3935 .  j - the column indices for each local row (starts with zero)
3936 -  v - optional values in the matrix
3937 
3938    Level: developer
3939 
3940    Notes:
3941        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3942      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3943      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3944 
3945        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3946 
3947        The format which is used for the sparse matrix input, is equivalent to a
3948     row-major ordering.. i.e for the following matrix, the input data expected is
3949     as shown
3950 
3951 $        1 0 0
3952 $        2 0 3     P0
3953 $       -------
3954 $        4 5 6     P1
3955 $
3956 $     Process0 [P0]: rows_owned=[0,1]
3957 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3958 $        j =  {0,0,2}  [size = 3]
3959 $        v =  {1,2,3}  [size = 3]
3960 $
3961 $     Process1 [P1]: rows_owned=[2]
3962 $        i =  {0,3}    [size = nrow+1  = 1+1]
3963 $        j =  {0,1,2}  [size = 3]
3964 $        v =  {4,5,6}  [size = 3]
3965 
3966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3967           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3968 @*/
3969 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3970 {
3971   PetscErrorCode ierr;
3972 
3973   PetscFunctionBegin;
3974   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 /*@C
3979    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3980    (the default parallel PETSc format).  For good matrix assembly performance
3981    the user should preallocate the matrix storage by setting the parameters
3982    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3983    performance can be increased by more than a factor of 50.
3984 
3985    Collective
3986 
3987    Input Parameters:
3988 +  B - the matrix
3989 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3990            (same value is used for all local rows)
3991 .  d_nnz - array containing the number of nonzeros in the various rows of the
3992            DIAGONAL portion of the local submatrix (possibly different for each row)
3993            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3994            The size of this array is equal to the number of local rows, i.e 'm'.
3995            For matrices that will be factored, you must leave room for (and set)
3996            the diagonal entry even if it is zero.
3997 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3998            submatrix (same value is used for all local rows).
3999 -  o_nnz - array containing the number of nonzeros in the various rows of the
4000            OFF-DIAGONAL portion of the local submatrix (possibly different for
4001            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4002            structure. The size of this array is equal to the number
4003            of local rows, i.e 'm'.
4004 
4005    If the *_nnz parameter is given then the *_nz parameter is ignored
4006 
4007    The AIJ format (also called the Yale sparse matrix format or
4008    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4009    storage.  The stored row and column indices begin with zero.
4010    See Users-Manual: ch_mat for details.
4011 
4012    The parallel matrix is partitioned such that the first m0 rows belong to
4013    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4014    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4015 
4016    The DIAGONAL portion of the local submatrix of a processor can be defined
4017    as the submatrix which is obtained by extraction the part corresponding to
4018    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4019    first row that belongs to the processor, r2 is the last row belonging to
4020    the this processor, and c1-c2 is range of indices of the local part of a
4021    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4022    common case of a square matrix, the row and column ranges are the same and
4023    the DIAGONAL part is also square. The remaining portion of the local
4024    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4025 
4026    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4027 
4028    You can call MatGetInfo() to get information on how effective the preallocation was;
4029    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4030    You can also run with the option -info and look for messages with the string
4031    malloc in them to see if additional memory allocation was needed.
4032 
4033    Example usage:
4034 
4035    Consider the following 8x8 matrix with 34 non-zero values, that is
4036    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4037    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4038    as follows:
4039 
4040 .vb
4041             1  2  0  |  0  3  0  |  0  4
4042     Proc0   0  5  6  |  7  0  0  |  8  0
4043             9  0 10  | 11  0  0  | 12  0
4044     -------------------------------------
4045            13  0 14  | 15 16 17  |  0  0
4046     Proc1   0 18  0  | 19 20 21  |  0  0
4047             0  0  0  | 22 23  0  | 24  0
4048     -------------------------------------
4049     Proc2  25 26 27  |  0  0 28  | 29  0
4050            30  0  0  | 31 32 33  |  0 34
4051 .ve
4052 
4053    This can be represented as a collection of submatrices as:
4054 
4055 .vb
4056       A B C
4057       D E F
4058       G H I
4059 .ve
4060 
4061    Where the submatrices A,B,C are owned by proc0, D,E,F are
4062    owned by proc1, G,H,I are owned by proc2.
4063 
4064    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4065    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4066    The 'M','N' parameters are 8,8, and have the same values on all procs.
4067 
4068    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4069    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4070    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4071    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4072    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4073    matrix, ans [DF] as another SeqAIJ matrix.
4074 
4075    When d_nz, o_nz parameters are specified, d_nz storage elements are
4076    allocated for every row of the local diagonal submatrix, and o_nz
4077    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4078    One way to choose d_nz and o_nz is to use the max nonzerors per local
4079    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4080    In this case, the values of d_nz,o_nz are:
4081 .vb
4082      proc0 : dnz = 2, o_nz = 2
4083      proc1 : dnz = 3, o_nz = 2
4084      proc2 : dnz = 1, o_nz = 4
4085 .ve
4086    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4087    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4088    for proc3. i.e we are using 12+15+10=37 storage locations to store
4089    34 values.
4090 
4091    When d_nnz, o_nnz parameters are specified, the storage is specified
4092    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4093    In the above case the values for d_nnz,o_nnz are:
4094 .vb
4095      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4096      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4097      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4098 .ve
4099    Here the space allocated is sum of all the above values i.e 34, and
4100    hence pre-allocation is perfect.
4101 
4102    Level: intermediate
4103 
4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4105           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4106 @*/
4107 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4108 {
4109   PetscErrorCode ierr;
4110 
4111   PetscFunctionBegin;
4112   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4113   PetscValidType(B,1);
4114   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4115   PetscFunctionReturn(0);
4116 }
4117 
4118 /*@
4119      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4120          CSR format for the local rows.
4121 
4122    Collective
4123 
4124    Input Parameters:
4125 +  comm - MPI communicator
4126 .  m - number of local rows (Cannot be PETSC_DECIDE)
4127 .  n - This value should be the same as the local size used in creating the
4128        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4129        calculated if N is given) For square matrices n is almost always m.
4130 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4131 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4132 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4133 .   j - column indices
4134 -   a - matrix values
4135 
4136    Output Parameter:
4137 .   mat - the matrix
4138 
4139    Level: intermediate
4140 
4141    Notes:
4142        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4143      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4144      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4145 
4146        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4147 
4148        The format which is used for the sparse matrix input, is equivalent to a
4149     row-major ordering.. i.e for the following matrix, the input data expected is
4150     as shown
4151 
4152        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4153 
4154 $        1 0 0
4155 $        2 0 3     P0
4156 $       -------
4157 $        4 5 6     P1
4158 $
4159 $     Process0 [P0]: rows_owned=[0,1]
4160 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4161 $        j =  {0,0,2}  [size = 3]
4162 $        v =  {1,2,3}  [size = 3]
4163 $
4164 $     Process1 [P1]: rows_owned=[2]
4165 $        i =  {0,3}    [size = nrow+1  = 1+1]
4166 $        j =  {0,1,2}  [size = 3]
4167 $        v =  {4,5,6}  [size = 3]
4168 
4169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4170           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4171 @*/
4172 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4173 {
4174   PetscErrorCode ierr;
4175 
4176   PetscFunctionBegin;
4177   PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4178   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4179   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4180   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4181   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4182   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4183   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4184   PetscFunctionReturn(0);
4185 }
4186 
4187 /*@
4188      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4189          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4190 
4191    Collective
4192 
4193    Input Parameters:
4194 +  mat - the matrix
4195 .  m - number of local rows (Cannot be PETSC_DECIDE)
4196 .  n - This value should be the same as the local size used in creating the
4197        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4198        calculated if N is given) For square matrices n is almost always m.
4199 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4200 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4201 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4202 .  J - column indices
4203 -  v - matrix values
4204 
4205    Level: intermediate
4206 
4207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4208           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4209 @*/
4210 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4211 {
4212   PetscErrorCode ierr;
4213   PetscInt       cstart,nnz,i,j;
4214   PetscInt       *ld;
4215   PetscBool      nooffprocentries;
4216   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4217   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4218   PetscScalar    *ad,*ao;
4219   const PetscInt *Adi = Ad->i;
4220   PetscInt       ldi,Iii,md;
4221 
4222   PetscFunctionBegin;
4223   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4224   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4225   PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4226   PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4227 
4228   ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4229   ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4230   cstart = mat->cmap->rstart;
4231   if (!Aij->ld) {
4232     /* count number of entries below block diagonal */
4233     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4234     Aij->ld = ld;
4235     for (i=0; i<m; i++) {
4236       nnz  = Ii[i+1]- Ii[i];
4237       j     = 0;
4238       while  (J[j] < cstart && j < nnz) {j++;}
4239       J    += nnz;
4240       ld[i] = j;
4241     }
4242   } else {
4243     ld = Aij->ld;
4244   }
4245 
4246   for (i=0; i<m; i++) {
4247     nnz  = Ii[i+1]- Ii[i];
4248     Iii  = Ii[i];
4249     ldi  = ld[i];
4250     md   = Adi[i+1]-Adi[i];
4251     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4252     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4253     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4254     ad  += md;
4255     ao  += nnz - md;
4256   }
4257   nooffprocentries      = mat->nooffprocentries;
4258   mat->nooffprocentries = PETSC_TRUE;
4259   ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4260   ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4261   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4262   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4263   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4264   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4265   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4266   mat->nooffprocentries = nooffprocentries;
4267   PetscFunctionReturn(0);
4268 }
4269 
4270 /*@C
4271    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4272    (the default parallel PETSc format).  For good matrix assembly performance
4273    the user should preallocate the matrix storage by setting the parameters
4274    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4275    performance can be increased by more than a factor of 50.
4276 
4277    Collective
4278 
4279    Input Parameters:
4280 +  comm - MPI communicator
4281 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4282            This value should be the same as the local size used in creating the
4283            y vector for the matrix-vector product y = Ax.
4284 .  n - This value should be the same as the local size used in creating the
4285        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4286        calculated if N is given) For square matrices n is almost always m.
4287 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4288 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4289 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4290            (same value is used for all local rows)
4291 .  d_nnz - array containing the number of nonzeros in the various rows of the
4292            DIAGONAL portion of the local submatrix (possibly different for each row)
4293            or NULL, if d_nz is used to specify the nonzero structure.
4294            The size of this array is equal to the number of local rows, i.e 'm'.
4295 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4296            submatrix (same value is used for all local rows).
4297 -  o_nnz - array containing the number of nonzeros in the various rows of the
4298            OFF-DIAGONAL portion of the local submatrix (possibly different for
4299            each row) or NULL, if o_nz is used to specify the nonzero
4300            structure. The size of this array is equal to the number
4301            of local rows, i.e 'm'.
4302 
4303    Output Parameter:
4304 .  A - the matrix
4305 
4306    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4307    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4308    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4309 
4310    Notes:
4311    If the *_nnz parameter is given then the *_nz parameter is ignored
4312 
4313    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4314    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4315    storage requirements for this matrix.
4316 
4317    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4318    processor than it must be used on all processors that share the object for
4319    that argument.
4320 
4321    The user MUST specify either the local or global matrix dimensions
4322    (possibly both).
4323 
4324    The parallel matrix is partitioned across processors such that the
4325    first m0 rows belong to process 0, the next m1 rows belong to
4326    process 1, the next m2 rows belong to process 2 etc.. where
4327    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4328    values corresponding to [m x N] submatrix.
4329 
4330    The columns are logically partitioned with the n0 columns belonging
4331    to 0th partition, the next n1 columns belonging to the next
4332    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4333 
4334    The DIAGONAL portion of the local submatrix on any given processor
4335    is the submatrix corresponding to the rows and columns m,n
4336    corresponding to the given processor. i.e diagonal matrix on
4337    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4338    etc. The remaining portion of the local submatrix [m x (N-n)]
4339    constitute the OFF-DIAGONAL portion. The example below better
4340    illustrates this concept.
4341 
4342    For a square global matrix we define each processor's diagonal portion
4343    to be its local rows and the corresponding columns (a square submatrix);
4344    each processor's off-diagonal portion encompasses the remainder of the
4345    local matrix (a rectangular submatrix).
4346 
4347    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4348 
4349    When calling this routine with a single process communicator, a matrix of
4350    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4351    type of communicator, use the construction mechanism
4352 .vb
4353      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4354 .ve
4355 
4356 $     MatCreate(...,&A);
4357 $     MatSetType(A,MATMPIAIJ);
4358 $     MatSetSizes(A, m,n,M,N);
4359 $     MatMPIAIJSetPreallocation(A,...);
4360 
4361    By default, this format uses inodes (identical nodes) when possible.
4362    We search for consecutive rows with the same nonzero structure, thereby
4363    reusing matrix information to achieve increased efficiency.
4364 
4365    Options Database Keys:
4366 +  -mat_no_inode  - Do not use inodes
4367 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4368 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4369         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4370         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4371 
4372    Example usage:
4373 
4374    Consider the following 8x8 matrix with 34 non-zero values, that is
4375    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4376    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4377    as follows
4378 
4379 .vb
4380             1  2  0  |  0  3  0  |  0  4
4381     Proc0   0  5  6  |  7  0  0  |  8  0
4382             9  0 10  | 11  0  0  | 12  0
4383     -------------------------------------
4384            13  0 14  | 15 16 17  |  0  0
4385     Proc1   0 18  0  | 19 20 21  |  0  0
4386             0  0  0  | 22 23  0  | 24  0
4387     -------------------------------------
4388     Proc2  25 26 27  |  0  0 28  | 29  0
4389            30  0  0  | 31 32 33  |  0 34
4390 .ve
4391 
4392    This can be represented as a collection of submatrices as
4393 
4394 .vb
4395       A B C
4396       D E F
4397       G H I
4398 .ve
4399 
4400    Where the submatrices A,B,C are owned by proc0, D,E,F are
4401    owned by proc1, G,H,I are owned by proc2.
4402 
4403    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4404    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4405    The 'M','N' parameters are 8,8, and have the same values on all procs.
4406 
4407    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4408    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4409    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4410    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4411    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4412    matrix, ans [DF] as another SeqAIJ matrix.
4413 
4414    When d_nz, o_nz parameters are specified, d_nz storage elements are
4415    allocated for every row of the local diagonal submatrix, and o_nz
4416    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4417    One way to choose d_nz and o_nz is to use the max nonzerors per local
4418    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4419    In this case, the values of d_nz,o_nz are
4420 .vb
4421      proc0 : dnz = 2, o_nz = 2
4422      proc1 : dnz = 3, o_nz = 2
4423      proc2 : dnz = 1, o_nz = 4
4424 .ve
4425    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4426    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4427    for proc3. i.e we are using 12+15+10=37 storage locations to store
4428    34 values.
4429 
4430    When d_nnz, o_nnz parameters are specified, the storage is specified
4431    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4432    In the above case the values for d_nnz,o_nnz are
4433 .vb
4434      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4435      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4436      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4437 .ve
4438    Here the space allocated is sum of all the above values i.e 34, and
4439    hence pre-allocation is perfect.
4440 
4441    Level: intermediate
4442 
4443 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4444           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4445 @*/
4446 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4447 {
4448   PetscErrorCode ierr;
4449   PetscMPIInt    size;
4450 
4451   PetscFunctionBegin;
4452   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4453   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4454   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4455   if (size > 1) {
4456     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4457     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4458   } else {
4459     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4460     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4461   }
4462   PetscFunctionReturn(0);
4463 }
4464 
4465 /*@C
4466   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4467 
4468   Not collective
4469 
4470   Input Parameter:
4471 . A - The MPIAIJ matrix
4472 
4473   Output Parameters:
4474 + Ad - The local diagonal block as a SeqAIJ matrix
4475 . Ao - The local off-diagonal block as a SeqAIJ matrix
4476 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4477 
4478   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4479   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4480   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4481   local column numbers to global column numbers in the original matrix.
4482 
4483   Level: intermediate
4484 
4485 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4486 @*/
4487 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4488 {
4489   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4490   PetscBool      flg;
4491   PetscErrorCode ierr;
4492 
4493   PetscFunctionBegin;
4494   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4495   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4496   if (Ad)     *Ad     = a->A;
4497   if (Ao)     *Ao     = a->B;
4498   if (colmap) *colmap = a->garray;
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4503 {
4504   PetscErrorCode ierr;
4505   PetscInt       m,N,i,rstart,nnz,Ii;
4506   PetscInt       *indx;
4507   PetscScalar    *values;
4508   MatType        rootType;
4509 
4510   PetscFunctionBegin;
4511   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4512   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4513     PetscInt       *dnz,*onz,sum,bs,cbs;
4514 
4515     if (n == PETSC_DECIDE) {
4516       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4517     }
4518     /* Check sum(n) = N */
4519     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4520     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4521 
4522     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4523     rstart -= m;
4524 
4525     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4526     for (i=0; i<m; i++) {
4527       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4528       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4529       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4530     }
4531 
4532     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4533     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4534     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4535     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4536     ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr);
4537     ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr);
4538     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4539     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4540     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4541     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4542   }
4543 
4544   /* numeric phase */
4545   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4546   for (i=0; i<m; i++) {
4547     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4548     Ii   = i + rstart;
4549     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4550     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4551   }
4552   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4553   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4554   PetscFunctionReturn(0);
4555 }
4556 
4557 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4558 {
4559   PetscErrorCode    ierr;
4560   PetscMPIInt       rank;
4561   PetscInt          m,N,i,rstart,nnz;
4562   size_t            len;
4563   const PetscInt    *indx;
4564   PetscViewer       out;
4565   char              *name;
4566   Mat               B;
4567   const PetscScalar *values;
4568 
4569   PetscFunctionBegin;
4570   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4571   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4572   /* Should this be the type of the diagonal block of A? */
4573   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4574   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4575   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4576   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4577   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4578   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4579   for (i=0; i<m; i++) {
4580     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4581     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4582     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4583   }
4584   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4585   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4586 
4587   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4588   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4589   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4590   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4591   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4592   ierr = PetscFree(name);CHKERRQ(ierr);
4593   ierr = MatView(B,out);CHKERRQ(ierr);
4594   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4595   ierr = MatDestroy(&B);CHKERRQ(ierr);
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4600 {
4601   PetscErrorCode      ierr;
4602   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4603 
4604   PetscFunctionBegin;
4605   if (!merge) PetscFunctionReturn(0);
4606   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4607   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4608   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4609   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4610   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4611   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4612   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4613   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4614   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4615   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4616   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4617   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4618   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4619   ierr = PetscFree(merge);CHKERRQ(ierr);
4620   PetscFunctionReturn(0);
4621 }
4622 
4623 #include <../src/mat/utils/freespace.h>
4624 #include <petscbt.h>
4625 
4626 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4627 {
4628   PetscErrorCode      ierr;
4629   MPI_Comm            comm;
4630   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4631   PetscMPIInt         size,rank,taga,*len_s;
4632   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4633   PetscInt            proc,m;
4634   PetscInt            **buf_ri,**buf_rj;
4635   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4636   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4637   MPI_Request         *s_waits,*r_waits;
4638   MPI_Status          *status;
4639   const MatScalar     *aa,*a_a;
4640   MatScalar           **abuf_r,*ba_i;
4641   Mat_Merge_SeqsToMPI *merge;
4642   PetscContainer      container;
4643 
4644   PetscFunctionBegin;
4645   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4646   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4647 
4648   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4649   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4650 
4651   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4652   PetscCheckFalse(!container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4653   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4654   ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4655   aa   = a_a;
4656 
4657   bi     = merge->bi;
4658   bj     = merge->bj;
4659   buf_ri = merge->buf_ri;
4660   buf_rj = merge->buf_rj;
4661 
4662   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4663   owners = merge->rowmap->range;
4664   len_s  = merge->len_s;
4665 
4666   /* send and recv matrix values */
4667   /*-----------------------------*/
4668   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4669   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4670 
4671   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4672   for (proc=0,k=0; proc<size; proc++) {
4673     if (!len_s[proc]) continue;
4674     i    = owners[proc];
4675     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4676     k++;
4677   }
4678 
4679   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4680   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4681   ierr = PetscFree(status);CHKERRQ(ierr);
4682 
4683   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4684   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4685 
4686   /* insert mat values of mpimat */
4687   /*----------------------------*/
4688   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4689   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4690 
4691   for (k=0; k<merge->nrecv; k++) {
4692     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4693     nrows       = *(buf_ri_k[k]);
4694     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4695     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4696   }
4697 
4698   /* set values of ba */
4699   m    = merge->rowmap->n;
4700   for (i=0; i<m; i++) {
4701     arow = owners[rank] + i;
4702     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4703     bnzi = bi[i+1] - bi[i];
4704     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4705 
4706     /* add local non-zero vals of this proc's seqmat into ba */
4707     anzi   = ai[arow+1] - ai[arow];
4708     aj     = a->j + ai[arow];
4709     aa     = a_a + ai[arow];
4710     nextaj = 0;
4711     for (j=0; nextaj<anzi; j++) {
4712       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4713         ba_i[j] += aa[nextaj++];
4714       }
4715     }
4716 
4717     /* add received vals into ba */
4718     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4719       /* i-th row */
4720       if (i == *nextrow[k]) {
4721         anzi   = *(nextai[k]+1) - *nextai[k];
4722         aj     = buf_rj[k] + *(nextai[k]);
4723         aa     = abuf_r[k] + *(nextai[k]);
4724         nextaj = 0;
4725         for (j=0; nextaj<anzi; j++) {
4726           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4727             ba_i[j] += aa[nextaj++];
4728           }
4729         }
4730         nextrow[k]++; nextai[k]++;
4731       }
4732     }
4733     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4734   }
4735   ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4736   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4737   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4738 
4739   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4740   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4741   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4742   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4743   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4744   PetscFunctionReturn(0);
4745 }
4746 
4747 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4748 {
4749   PetscErrorCode      ierr;
4750   Mat                 B_mpi;
4751   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4752   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4753   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4754   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4755   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4756   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4757   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4758   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4759   MPI_Status          *status;
4760   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4761   PetscBT             lnkbt;
4762   Mat_Merge_SeqsToMPI *merge;
4763   PetscContainer      container;
4764 
4765   PetscFunctionBegin;
4766   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4767 
4768   /* make sure it is a PETSc comm */
4769   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4770   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4771   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4772 
4773   ierr = PetscNew(&merge);CHKERRQ(ierr);
4774   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4775 
4776   /* determine row ownership */
4777   /*---------------------------------------------------------*/
4778   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4779   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4780   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4781   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4782   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4783   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4784   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4785 
4786   m      = merge->rowmap->n;
4787   owners = merge->rowmap->range;
4788 
4789   /* determine the number of messages to send, their lengths */
4790   /*---------------------------------------------------------*/
4791   len_s = merge->len_s;
4792 
4793   len          = 0; /* length of buf_si[] */
4794   merge->nsend = 0;
4795   for (proc=0; proc<size; proc++) {
4796     len_si[proc] = 0;
4797     if (proc == rank) {
4798       len_s[proc] = 0;
4799     } else {
4800       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4801       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4802     }
4803     if (len_s[proc]) {
4804       merge->nsend++;
4805       nrows = 0;
4806       for (i=owners[proc]; i<owners[proc+1]; i++) {
4807         if (ai[i+1] > ai[i]) nrows++;
4808       }
4809       len_si[proc] = 2*(nrows+1);
4810       len         += len_si[proc];
4811     }
4812   }
4813 
4814   /* determine the number and length of messages to receive for ij-structure */
4815   /*-------------------------------------------------------------------------*/
4816   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4817   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4818 
4819   /* post the Irecv of j-structure */
4820   /*-------------------------------*/
4821   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4822   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4823 
4824   /* post the Isend of j-structure */
4825   /*--------------------------------*/
4826   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4827 
4828   for (proc=0, k=0; proc<size; proc++) {
4829     if (!len_s[proc]) continue;
4830     i    = owners[proc];
4831     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4832     k++;
4833   }
4834 
4835   /* receives and sends of j-structure are complete */
4836   /*------------------------------------------------*/
4837   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4838   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4839 
4840   /* send and recv i-structure */
4841   /*---------------------------*/
4842   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4843   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4844 
4845   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4846   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4847   for (proc=0,k=0; proc<size; proc++) {
4848     if (!len_s[proc]) continue;
4849     /* form outgoing message for i-structure:
4850          buf_si[0]:                 nrows to be sent
4851                [1:nrows]:           row index (global)
4852                [nrows+1:2*nrows+1]: i-structure index
4853     */
4854     /*-------------------------------------------*/
4855     nrows       = len_si[proc]/2 - 1;
4856     buf_si_i    = buf_si + nrows+1;
4857     buf_si[0]   = nrows;
4858     buf_si_i[0] = 0;
4859     nrows       = 0;
4860     for (i=owners[proc]; i<owners[proc+1]; i++) {
4861       anzi = ai[i+1] - ai[i];
4862       if (anzi) {
4863         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4864         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4865         nrows++;
4866       }
4867     }
4868     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4869     k++;
4870     buf_si += len_si[proc];
4871   }
4872 
4873   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4874   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4875 
4876   ierr = PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4877   for (i=0; i<merge->nrecv; i++) {
4878     ierr = PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4879   }
4880 
4881   ierr = PetscFree(len_si);CHKERRQ(ierr);
4882   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4883   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4884   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4885   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4886   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4887   ierr = PetscFree(status);CHKERRQ(ierr);
4888 
4889   /* compute a local seq matrix in each processor */
4890   /*----------------------------------------------*/
4891   /* allocate bi array and free space for accumulating nonzero column info */
4892   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4893   bi[0] = 0;
4894 
4895   /* create and initialize a linked list */
4896   nlnk = N+1;
4897   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4898 
4899   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4900   len  = ai[owners[rank+1]] - ai[owners[rank]];
4901   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4902 
4903   current_space = free_space;
4904 
4905   /* determine symbolic info for each local row */
4906   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4907 
4908   for (k=0; k<merge->nrecv; k++) {
4909     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4910     nrows       = *buf_ri_k[k];
4911     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4912     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4913   }
4914 
4915   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4916   len  = 0;
4917   for (i=0; i<m; i++) {
4918     bnzi = 0;
4919     /* add local non-zero cols of this proc's seqmat into lnk */
4920     arow  = owners[rank] + i;
4921     anzi  = ai[arow+1] - ai[arow];
4922     aj    = a->j + ai[arow];
4923     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4924     bnzi += nlnk;
4925     /* add received col data into lnk */
4926     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4927       if (i == *nextrow[k]) { /* i-th row */
4928         anzi  = *(nextai[k]+1) - *nextai[k];
4929         aj    = buf_rj[k] + *nextai[k];
4930         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4931         bnzi += nlnk;
4932         nextrow[k]++; nextai[k]++;
4933       }
4934     }
4935     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4936 
4937     /* if free space is not available, make more free space */
4938     if (current_space->local_remaining<bnzi) {
4939       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4940       nspacedouble++;
4941     }
4942     /* copy data into free space, then initialize lnk */
4943     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4944     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4945 
4946     current_space->array           += bnzi;
4947     current_space->local_used      += bnzi;
4948     current_space->local_remaining -= bnzi;
4949 
4950     bi[i+1] = bi[i] + bnzi;
4951   }
4952 
4953   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4954 
4955   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4956   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4957   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4958 
4959   /* create symbolic parallel matrix B_mpi */
4960   /*---------------------------------------*/
4961   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4962   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4963   if (n==PETSC_DECIDE) {
4964     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4965   } else {
4966     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4967   }
4968   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4969   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4970   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4971   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4972   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4973 
4974   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4975   B_mpi->assembled  = PETSC_FALSE;
4976   merge->bi         = bi;
4977   merge->bj         = bj;
4978   merge->buf_ri     = buf_ri;
4979   merge->buf_rj     = buf_rj;
4980   merge->coi        = NULL;
4981   merge->coj        = NULL;
4982   merge->owners_co  = NULL;
4983 
4984   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4985 
4986   /* attach the supporting struct to B_mpi for reuse */
4987   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4988   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4989   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4990   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4991   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4992   *mpimat = B_mpi;
4993 
4994   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4995   PetscFunctionReturn(0);
4996 }
4997 
4998 /*@C
4999       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5000                  matrices from each processor
5001 
5002     Collective
5003 
5004    Input Parameters:
5005 +    comm - the communicators the parallel matrix will live on
5006 .    seqmat - the input sequential matrices
5007 .    m - number of local rows (or PETSC_DECIDE)
5008 .    n - number of local columns (or PETSC_DECIDE)
5009 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5010 
5011    Output Parameter:
5012 .    mpimat - the parallel matrix generated
5013 
5014     Level: advanced
5015 
5016    Notes:
5017      The dimensions of the sequential matrix in each processor MUST be the same.
5018      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5019      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5020 @*/
5021 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5022 {
5023   PetscErrorCode ierr;
5024   PetscMPIInt    size;
5025 
5026   PetscFunctionBegin;
5027   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5028   if (size == 1) {
5029     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5030     if (scall == MAT_INITIAL_MATRIX) {
5031       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5032     } else {
5033       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5034     }
5035     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5036     PetscFunctionReturn(0);
5037   }
5038   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5039   if (scall == MAT_INITIAL_MATRIX) {
5040     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5041   }
5042   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5043   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5044   PetscFunctionReturn(0);
5045 }
5046 
5047 /*@
5048      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5049           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5050           with MatGetSize()
5051 
5052     Not Collective
5053 
5054    Input Parameters:
5055 +    A - the matrix
5056 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5057 
5058    Output Parameter:
5059 .    A_loc - the local sequential matrix generated
5060 
5061     Level: developer
5062 
5063    Notes:
5064      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5065      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5066      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5067      modify the values of the returned A_loc.
5068 
5069 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5070 @*/
5071 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5072 {
5073   PetscErrorCode    ierr;
5074   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5075   Mat_SeqAIJ        *mat,*a,*b;
5076   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5077   const PetscScalar *aa,*ba,*aav,*bav;
5078   PetscScalar       *ca,*cam;
5079   PetscMPIInt       size;
5080   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5081   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5082   PetscBool         match;
5083 
5084   PetscFunctionBegin;
5085   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5086   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5087   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5088   if (size == 1) {
5089     if (scall == MAT_INITIAL_MATRIX) {
5090       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5091       *A_loc = mpimat->A;
5092     } else if (scall == MAT_REUSE_MATRIX) {
5093       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5094     }
5095     PetscFunctionReturn(0);
5096   }
5097 
5098   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5099   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5100   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5101   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5102   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5103   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5104   aa   = aav;
5105   ba   = bav;
5106   if (scall == MAT_INITIAL_MATRIX) {
5107     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5108     ci[0] = 0;
5109     for (i=0; i<am; i++) {
5110       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5111     }
5112     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5113     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5114     k    = 0;
5115     for (i=0; i<am; i++) {
5116       ncols_o = bi[i+1] - bi[i];
5117       ncols_d = ai[i+1] - ai[i];
5118       /* off-diagonal portion of A */
5119       for (jo=0; jo<ncols_o; jo++) {
5120         col = cmap[*bj];
5121         if (col >= cstart) break;
5122         cj[k]   = col; bj++;
5123         ca[k++] = *ba++;
5124       }
5125       /* diagonal portion of A */
5126       for (j=0; j<ncols_d; j++) {
5127         cj[k]   = cstart + *aj++;
5128         ca[k++] = *aa++;
5129       }
5130       /* off-diagonal portion of A */
5131       for (j=jo; j<ncols_o; j++) {
5132         cj[k]   = cmap[*bj++];
5133         ca[k++] = *ba++;
5134       }
5135     }
5136     /* put together the new matrix */
5137     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5138     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5139     /* Since these are PETSc arrays, change flags to free them as necessary. */
5140     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5141     mat->free_a  = PETSC_TRUE;
5142     mat->free_ij = PETSC_TRUE;
5143     mat->nonew   = 0;
5144   } else if (scall == MAT_REUSE_MATRIX) {
5145     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5146     ci   = mat->i;
5147     cj   = mat->j;
5148     ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5149     for (i=0; i<am; i++) {
5150       /* off-diagonal portion of A */
5151       ncols_o = bi[i+1] - bi[i];
5152       for (jo=0; jo<ncols_o; jo++) {
5153         col = cmap[*bj];
5154         if (col >= cstart) break;
5155         *cam++ = *ba++; bj++;
5156       }
5157       /* diagonal portion of A */
5158       ncols_d = ai[i+1] - ai[i];
5159       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5160       /* off-diagonal portion of A */
5161       for (j=jo; j<ncols_o; j++) {
5162         *cam++ = *ba++; bj++;
5163       }
5164     }
5165     ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5166   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5167   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5168   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5169   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5170   PetscFunctionReturn(0);
5171 }
5172 
5173 /*@
5174      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5175           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5176 
5177     Not Collective
5178 
5179    Input Parameters:
5180 +    A - the matrix
5181 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5182 
5183    Output Parameters:
5184 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5185 -    A_loc - the local sequential matrix generated
5186 
5187     Level: developer
5188 
5189    Notes:
5190      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5191 
5192 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5193 
5194 @*/
5195 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5196 {
5197   PetscErrorCode ierr;
5198   Mat            Ao,Ad;
5199   const PetscInt *cmap;
5200   PetscMPIInt    size;
5201   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5202 
5203   PetscFunctionBegin;
5204   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5205   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5206   if (size == 1) {
5207     if (scall == MAT_INITIAL_MATRIX) {
5208       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5209       *A_loc = Ad;
5210     } else if (scall == MAT_REUSE_MATRIX) {
5211       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5212     }
5213     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5214     PetscFunctionReturn(0);
5215   }
5216   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5217   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5218   if (f) {
5219     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5220   } else {
5221     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5222     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5223     Mat_SeqAIJ        *c;
5224     PetscInt          *ai = a->i, *aj = a->j;
5225     PetscInt          *bi = b->i, *bj = b->j;
5226     PetscInt          *ci,*cj;
5227     const PetscScalar *aa,*ba;
5228     PetscScalar       *ca;
5229     PetscInt          i,j,am,dn,on;
5230 
5231     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5232     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5233     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5234     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5235     if (scall == MAT_INITIAL_MATRIX) {
5236       PetscInt k;
5237       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5238       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5239       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5240       ci[0] = 0;
5241       for (i=0,k=0; i<am; i++) {
5242         const PetscInt ncols_o = bi[i+1] - bi[i];
5243         const PetscInt ncols_d = ai[i+1] - ai[i];
5244         ci[i+1] = ci[i] + ncols_o + ncols_d;
5245         /* diagonal portion of A */
5246         for (j=0; j<ncols_d; j++,k++) {
5247           cj[k] = *aj++;
5248           ca[k] = *aa++;
5249         }
5250         /* off-diagonal portion of A */
5251         for (j=0; j<ncols_o; j++,k++) {
5252           cj[k] = dn + *bj++;
5253           ca[k] = *ba++;
5254         }
5255       }
5256       /* put together the new matrix */
5257       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5258       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5259       /* Since these are PETSc arrays, change flags to free them as necessary. */
5260       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5261       c->free_a  = PETSC_TRUE;
5262       c->free_ij = PETSC_TRUE;
5263       c->nonew   = 0;
5264       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5265     } else if (scall == MAT_REUSE_MATRIX) {
5266       ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5267       for (i=0; i<am; i++) {
5268         const PetscInt ncols_d = ai[i+1] - ai[i];
5269         const PetscInt ncols_o = bi[i+1] - bi[i];
5270         /* diagonal portion of A */
5271         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5272         /* off-diagonal portion of A */
5273         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5274       }
5275       ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5276     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5277     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5278     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5279     if (glob) {
5280       PetscInt cst, *gidx;
5281 
5282       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5283       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5284       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5285       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5286       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5287     }
5288   }
5289   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5290   PetscFunctionReturn(0);
5291 }
5292 
5293 /*@C
5294      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5295 
5296     Not Collective
5297 
5298    Input Parameters:
5299 +    A - the matrix
5300 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5301 -    row, col - index sets of rows and columns to extract (or NULL)
5302 
5303    Output Parameter:
5304 .    A_loc - the local sequential matrix generated
5305 
5306     Level: developer
5307 
5308 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5309 
5310 @*/
5311 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5312 {
5313   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5314   PetscErrorCode ierr;
5315   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5316   IS             isrowa,iscola;
5317   Mat            *aloc;
5318   PetscBool      match;
5319 
5320   PetscFunctionBegin;
5321   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5322   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5323   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5324   if (!row) {
5325     start = A->rmap->rstart; end = A->rmap->rend;
5326     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5327   } else {
5328     isrowa = *row;
5329   }
5330   if (!col) {
5331     start = A->cmap->rstart;
5332     cmap  = a->garray;
5333     nzA   = a->A->cmap->n;
5334     nzB   = a->B->cmap->n;
5335     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5336     ncols = 0;
5337     for (i=0; i<nzB; i++) {
5338       if (cmap[i] < start) idx[ncols++] = cmap[i];
5339       else break;
5340     }
5341     imark = i;
5342     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5343     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5344     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5345   } else {
5346     iscola = *col;
5347   }
5348   if (scall != MAT_INITIAL_MATRIX) {
5349     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5350     aloc[0] = *A_loc;
5351   }
5352   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5353   if (!col) { /* attach global id of condensed columns */
5354     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5355   }
5356   *A_loc = aloc[0];
5357   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5358   if (!row) {
5359     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5360   }
5361   if (!col) {
5362     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5363   }
5364   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5365   PetscFunctionReturn(0);
5366 }
5367 
5368 /*
5369  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5370  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5371  * on a global size.
5372  * */
5373 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5374 {
5375   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5376   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5377   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5378   PetscMPIInt              owner;
5379   PetscSFNode              *iremote,*oiremote;
5380   const PetscInt           *lrowindices;
5381   PetscErrorCode           ierr;
5382   PetscSF                  sf,osf;
5383   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5384   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5385   MPI_Comm                 comm;
5386   ISLocalToGlobalMapping   mapping;
5387   const PetscScalar        *pd_a,*po_a;
5388 
5389   PetscFunctionBegin;
5390   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5391   /* plocalsize is the number of roots
5392    * nrows is the number of leaves
5393    * */
5394   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5395   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5396   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5397   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5398   for (i=0;i<nrows;i++) {
5399     /* Find a remote index and an owner for a row
5400      * The row could be local or remote
5401      * */
5402     owner = 0;
5403     lidx  = 0;
5404     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5405     iremote[i].index = lidx;
5406     iremote[i].rank  = owner;
5407   }
5408   /* Create SF to communicate how many nonzero columns for each row */
5409   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5410   /* SF will figure out the number of nonzero colunms for each row, and their
5411    * offsets
5412    * */
5413   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5414   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5415   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5416 
5417   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5418   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5419   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5420   roffsets[0] = 0;
5421   roffsets[1] = 0;
5422   for (i=0;i<plocalsize;i++) {
5423     /* diag */
5424     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5425     /* off diag */
5426     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5427     /* compute offsets so that we relative location for each row */
5428     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5429     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5430   }
5431   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5432   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5433   /* 'r' means root, and 'l' means leaf */
5434   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5435   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5436   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5437   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5438   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5439   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5440   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5441   dntotalcols = 0;
5442   ontotalcols = 0;
5443   ncol = 0;
5444   for (i=0;i<nrows;i++) {
5445     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5446     ncol = PetscMax(pnnz[i],ncol);
5447     /* diag */
5448     dntotalcols += nlcols[i*2+0];
5449     /* off diag */
5450     ontotalcols += nlcols[i*2+1];
5451   }
5452   /* We do not need to figure the right number of columns
5453    * since all the calculations will be done by going through the raw data
5454    * */
5455   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5456   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5457   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5458   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5459   /* diag */
5460   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5461   /* off diag */
5462   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5463   /* diag */
5464   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5465   /* off diag */
5466   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5467   dntotalcols = 0;
5468   ontotalcols = 0;
5469   ntotalcols  = 0;
5470   for (i=0;i<nrows;i++) {
5471     owner = 0;
5472     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5473     /* Set iremote for diag matrix */
5474     for (j=0;j<nlcols[i*2+0];j++) {
5475       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5476       iremote[dntotalcols].rank    = owner;
5477       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5478       ilocal[dntotalcols++]        = ntotalcols++;
5479     }
5480     /* off diag */
5481     for (j=0;j<nlcols[i*2+1];j++) {
5482       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5483       oiremote[ontotalcols].rank    = owner;
5484       oilocal[ontotalcols++]        = ntotalcols++;
5485     }
5486   }
5487   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5488   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5489   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5490   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5491   /* P serves as roots and P_oth is leaves
5492    * Diag matrix
5493    * */
5494   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5495   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5496   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5497 
5498   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5499   /* Off diag */
5500   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5501   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5502   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5503   ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5504   ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5505   /* We operate on the matrix internal data for saving memory */
5506   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5507   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5508   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5509   /* Convert to global indices for diag matrix */
5510   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5511   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5512   /* We want P_oth store global indices */
5513   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5514   /* Use memory scalable approach */
5515   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5516   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5517   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5518   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5519   /* Convert back to local indices */
5520   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5521   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5522   nout = 0;
5523   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5524   PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5525   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5526   /* Exchange values */
5527   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5528   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5529   ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5530   ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5531   /* Stop PETSc from shrinking memory */
5532   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5533   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5534   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5535   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5536   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5537   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5538   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5539   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5540   PetscFunctionReturn(0);
5541 }
5542 
5543 /*
5544  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5545  * This supports MPIAIJ and MAIJ
5546  * */
5547 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5548 {
5549   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5550   Mat_SeqAIJ            *p_oth;
5551   IS                    rows,map;
5552   PetscHMapI            hamp;
5553   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5554   MPI_Comm              comm;
5555   PetscSF               sf,osf;
5556   PetscBool             has;
5557   PetscErrorCode        ierr;
5558 
5559   PetscFunctionBegin;
5560   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5561   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5562   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5563    *  and then create a submatrix (that often is an overlapping matrix)
5564    * */
5565   if (reuse == MAT_INITIAL_MATRIX) {
5566     /* Use a hash table to figure out unique keys */
5567     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5568     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5569     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5570     count = 0;
5571     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5572     for (i=0;i<a->B->cmap->n;i++) {
5573       key  = a->garray[i]/dof;
5574       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5575       if (!has) {
5576         mapping[i] = count;
5577         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5578       } else {
5579         /* Current 'i' has the same value the previous step */
5580         mapping[i] = count-1;
5581       }
5582     }
5583     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5584     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5585     PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5586     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5587     off = 0;
5588     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5589     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5590     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5591     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5592     /* In case, the matrix was already created but users want to recreate the matrix */
5593     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5594     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5595     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5596     ierr = ISDestroy(&map);CHKERRQ(ierr);
5597     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5598   } else if (reuse == MAT_REUSE_MATRIX) {
5599     /* If matrix was already created, we simply update values using SF objects
5600      * that as attached to the matrix ealier.
5601      */
5602     const PetscScalar *pd_a,*po_a;
5603 
5604     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5605     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5606     PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5607     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5608     /* Update values in place */
5609     ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5610     ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5611     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5612     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5613     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5614     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5615     ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5616     ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5617   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5618   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5619   PetscFunctionReturn(0);
5620 }
5621 
5622 /*@C
5623     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5624 
5625     Collective on Mat
5626 
5627    Input Parameters:
5628 +    A - the first matrix in mpiaij format
5629 .    B - the second matrix in mpiaij format
5630 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5631 
5632    Input/Output Parameters:
5633 +    rowb - index sets of rows of B to extract (or NULL), modified on output
5634 -    colb - index sets of columns of B to extract (or NULL), modified on output
5635 
5636    Output Parameter:
5637 .    B_seq - the sequential matrix generated
5638 
5639     Level: developer
5640 
5641 @*/
5642 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5643 {
5644   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5645   PetscErrorCode ierr;
5646   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5647   IS             isrowb,iscolb;
5648   Mat            *bseq=NULL;
5649 
5650   PetscFunctionBegin;
5651   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5652     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5653   }
5654   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5655 
5656   if (scall == MAT_INITIAL_MATRIX) {
5657     start = A->cmap->rstart;
5658     cmap  = a->garray;
5659     nzA   = a->A->cmap->n;
5660     nzB   = a->B->cmap->n;
5661     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5662     ncols = 0;
5663     for (i=0; i<nzB; i++) {  /* row < local row index */
5664       if (cmap[i] < start) idx[ncols++] = cmap[i];
5665       else break;
5666     }
5667     imark = i;
5668     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5669     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5670     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5671     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5672   } else {
5673     PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5674     isrowb  = *rowb; iscolb = *colb;
5675     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5676     bseq[0] = *B_seq;
5677   }
5678   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5679   *B_seq = bseq[0];
5680   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5681   if (!rowb) {
5682     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5683   } else {
5684     *rowb = isrowb;
5685   }
5686   if (!colb) {
5687     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5688   } else {
5689     *colb = iscolb;
5690   }
5691   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5692   PetscFunctionReturn(0);
5693 }
5694 
5695 /*
5696     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5697     of the OFF-DIAGONAL portion of local A
5698 
5699     Collective on Mat
5700 
5701    Input Parameters:
5702 +    A,B - the matrices in mpiaij format
5703 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5704 
5705    Output Parameter:
5706 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5707 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5708 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5709 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5710 
5711     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5712      for this matrix. This is not desirable..
5713 
5714     Level: developer
5715 
5716 */
5717 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5718 {
5719   PetscErrorCode         ierr;
5720   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5721   Mat_SeqAIJ             *b_oth;
5722   VecScatter             ctx;
5723   MPI_Comm               comm;
5724   const PetscMPIInt      *rprocs,*sprocs;
5725   const PetscInt         *srow,*rstarts,*sstarts;
5726   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5727   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5728   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5729   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5730   PetscMPIInt            size,tag,rank,nreqs;
5731 
5732   PetscFunctionBegin;
5733   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5734   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5735 
5736   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5737     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5738   }
5739   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5740   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5741 
5742   if (size == 1) {
5743     startsj_s = NULL;
5744     bufa_ptr  = NULL;
5745     *B_oth    = NULL;
5746     PetscFunctionReturn(0);
5747   }
5748 
5749   ctx = a->Mvctx;
5750   tag = ((PetscObject)ctx)->tag;
5751 
5752   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5753   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5754   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5755   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5756   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5757   rwaits = reqs;
5758   swaits = reqs + nrecvs;
5759 
5760   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5761   if (scall == MAT_INITIAL_MATRIX) {
5762     /* i-array */
5763     /*---------*/
5764     /*  post receives */
5765     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5766     for (i=0; i<nrecvs; i++) {
5767       rowlen = rvalues + rstarts[i]*rbs;
5768       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5769       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5770     }
5771 
5772     /* pack the outgoing message */
5773     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5774 
5775     sstartsj[0] = 0;
5776     rstartsj[0] = 0;
5777     len         = 0; /* total length of j or a array to be sent */
5778     if (nsends) {
5779       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5780       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5781     }
5782     for (i=0; i<nsends; i++) {
5783       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5784       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5785       for (j=0; j<nrows; j++) {
5786         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5787         for (l=0; l<sbs; l++) {
5788           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5789 
5790           rowlen[j*sbs+l] = ncols;
5791 
5792           len += ncols;
5793           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5794         }
5795         k++;
5796       }
5797       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5798 
5799       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5800     }
5801     /* recvs and sends of i-array are completed */
5802     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5803     ierr = PetscFree(svalues);CHKERRQ(ierr);
5804 
5805     /* allocate buffers for sending j and a arrays */
5806     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5807     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5808 
5809     /* create i-array of B_oth */
5810     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5811 
5812     b_othi[0] = 0;
5813     len       = 0; /* total length of j or a array to be received */
5814     k         = 0;
5815     for (i=0; i<nrecvs; i++) {
5816       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5817       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5818       for (j=0; j<nrows; j++) {
5819         b_othi[k+1] = b_othi[k] + rowlen[j];
5820         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5821         k++;
5822       }
5823       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5824     }
5825     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5826 
5827     /* allocate space for j and a arrrays of B_oth */
5828     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5829     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5830 
5831     /* j-array */
5832     /*---------*/
5833     /*  post receives of j-array */
5834     for (i=0; i<nrecvs; i++) {
5835       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5836       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5837     }
5838 
5839     /* pack the outgoing message j-array */
5840     if (nsends) k = sstarts[0];
5841     for (i=0; i<nsends; i++) {
5842       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5843       bufJ  = bufj+sstartsj[i];
5844       for (j=0; j<nrows; j++) {
5845         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5846         for (ll=0; ll<sbs; ll++) {
5847           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5848           for (l=0; l<ncols; l++) {
5849             *bufJ++ = cols[l];
5850           }
5851           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5852         }
5853       }
5854       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5855     }
5856 
5857     /* recvs and sends of j-array are completed */
5858     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5859   } else if (scall == MAT_REUSE_MATRIX) {
5860     sstartsj = *startsj_s;
5861     rstartsj = *startsj_r;
5862     bufa     = *bufa_ptr;
5863     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5864     ierr     = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5865   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5866 
5867   /* a-array */
5868   /*---------*/
5869   /*  post receives of a-array */
5870   for (i=0; i<nrecvs; i++) {
5871     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5872     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5873   }
5874 
5875   /* pack the outgoing message a-array */
5876   if (nsends) k = sstarts[0];
5877   for (i=0; i<nsends; i++) {
5878     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5879     bufA  = bufa+sstartsj[i];
5880     for (j=0; j<nrows; j++) {
5881       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5882       for (ll=0; ll<sbs; ll++) {
5883         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5884         for (l=0; l<ncols; l++) {
5885           *bufA++ = vals[l];
5886         }
5887         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5888       }
5889     }
5890     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5891   }
5892   /* recvs and sends of a-array are completed */
5893   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5894   ierr = PetscFree(reqs);CHKERRQ(ierr);
5895 
5896   if (scall == MAT_INITIAL_MATRIX) {
5897     /* put together the new matrix */
5898     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5899 
5900     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5901     /* Since these are PETSc arrays, change flags to free them as necessary. */
5902     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5903     b_oth->free_a  = PETSC_TRUE;
5904     b_oth->free_ij = PETSC_TRUE;
5905     b_oth->nonew   = 0;
5906 
5907     ierr = PetscFree(bufj);CHKERRQ(ierr);
5908     if (!startsj_s || !bufa_ptr) {
5909       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5910       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5911     } else {
5912       *startsj_s = sstartsj;
5913       *startsj_r = rstartsj;
5914       *bufa_ptr  = bufa;
5915     }
5916   } else if (scall == MAT_REUSE_MATRIX) {
5917     ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5918   }
5919 
5920   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5921   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5922   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5923   PetscFunctionReturn(0);
5924 }
5925 
5926 /*@C
5927   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5928 
5929   Not Collective
5930 
5931   Input Parameter:
5932 . A - The matrix in mpiaij format
5933 
5934   Output Parameters:
5935 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5936 . colmap - A map from global column index to local index into lvec
5937 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5938 
5939   Level: developer
5940 
5941 @*/
5942 #if defined(PETSC_USE_CTABLE)
5943 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5944 #else
5945 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5946 #endif
5947 {
5948   Mat_MPIAIJ *a;
5949 
5950   PetscFunctionBegin;
5951   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5952   PetscValidPointer(lvec, 2);
5953   PetscValidPointer(colmap, 3);
5954   PetscValidPointer(multScatter, 4);
5955   a = (Mat_MPIAIJ*) A->data;
5956   if (lvec) *lvec = a->lvec;
5957   if (colmap) *colmap = a->colmap;
5958   if (multScatter) *multScatter = a->Mvctx;
5959   PetscFunctionReturn(0);
5960 }
5961 
5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5963 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5965 #if defined(PETSC_HAVE_MKL_SPARSE)
5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5967 #endif
5968 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5970 #if defined(PETSC_HAVE_ELEMENTAL)
5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5972 #endif
5973 #if defined(PETSC_HAVE_SCALAPACK)
5974 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5975 #endif
5976 #if defined(PETSC_HAVE_HYPRE)
5977 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5978 #endif
5979 #if defined(PETSC_HAVE_CUDA)
5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5981 #endif
5982 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5984 #endif
5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5986 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5988 
5989 /*
5990     Computes (B'*A')' since computing B*A directly is untenable
5991 
5992                n                       p                          p
5993         [             ]       [             ]         [                 ]
5994       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5995         [             ]       [             ]         [                 ]
5996 
5997 */
5998 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5999 {
6000   PetscErrorCode ierr;
6001   Mat            At,Bt,Ct;
6002 
6003   PetscFunctionBegin;
6004   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
6005   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
6006   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
6007   ierr = MatDestroy(&At);CHKERRQ(ierr);
6008   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
6009   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
6010   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
6011   PetscFunctionReturn(0);
6012 }
6013 
6014 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6015 {
6016   PetscErrorCode ierr;
6017   PetscBool      cisdense;
6018 
6019   PetscFunctionBegin;
6020   PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
6021   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
6022   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6023   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6024   if (!cisdense) {
6025     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6026   }
6027   ierr = MatSetUp(C);CHKERRQ(ierr);
6028 
6029   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6030   PetscFunctionReturn(0);
6031 }
6032 
6033 /* ----------------------------------------------------------------*/
6034 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6035 {
6036   Mat_Product *product = C->product;
6037   Mat         A = product->A,B=product->B;
6038 
6039   PetscFunctionBegin;
6040   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6041     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6042 
6043   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6044   C->ops->productsymbolic = MatProductSymbolic_AB;
6045   PetscFunctionReturn(0);
6046 }
6047 
6048 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6049 {
6050   PetscErrorCode ierr;
6051   Mat_Product    *product = C->product;
6052 
6053   PetscFunctionBegin;
6054   if (product->type == MATPRODUCT_AB) {
6055     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6056   }
6057   PetscFunctionReturn(0);
6058 }
6059 
6060 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
6061    is greater than value, or last if there is no such element.
6062 */
6063 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
6064 {
6065   PetscCount  it,step,count = last - first;
6066 
6067   PetscFunctionBegin;
6068   while (count > 0) {
6069     it   = first;
6070     step = count / 2;
6071     it  += step;
6072     if (!(value < array[it])) {
6073       first  = ++it;
6074       count -= step + 1;
6075     } else count = step;
6076   }
6077   *upper = first;
6078   PetscFunctionReturn(0);
6079 }
6080 
6081 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix
6082 
6083   Input Parameters:
6084 
6085     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6086     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6087 
6088     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat
6089 
6090     For Set1, j1[] contains column indices of the nonzeros.
6091     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6092     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6093     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6094 
6095     Similar for Set2.
6096 
6097     This routine merges the two sets of nonzeros row by row and removes repeats.
6098 
6099   Output Parameters: (memories are allocated by the caller)
6100 
6101     i[],j[]: the CSR of the merged matrix, which has m rows.
6102     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6103     imap2[]: similar to imap1[], but for Set2.
6104     Note we order nonzeros row-by-row and from left to right.
6105 */
6106 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6107   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6108   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6109 {
6110   PetscErrorCode ierr;
6111   PetscInt       r,m; /* Row index of mat */
6112   PetscCount     t,t1,t2,b1,e1,b2,e2;
6113 
6114   PetscFunctionBegin;
6115   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
6116   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6117   i[0] = 0;
6118   for (r=0; r<m; r++) { /* Do row by row merging */
6119     b1   = rowBegin1[r];
6120     e1   = rowEnd1[r];
6121     b2   = rowBegin2[r];
6122     e2   = rowEnd2[r];
6123     while (b1 < e1 && b2 < e2) {
6124       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6125         j[t]      = j1[b1];
6126         imap1[t1] = t;
6127         imap2[t2] = t;
6128         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6129         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6130         t1++; t2++; t++;
6131       } else if (j1[b1] < j2[b2]) {
6132         j[t]      = j1[b1];
6133         imap1[t1] = t;
6134         b1       += jmap1[t1+1] - jmap1[t1];
6135         t1++; t++;
6136       } else {
6137         j[t]      = j2[b2];
6138         imap2[t2] = t;
6139         b2       += jmap2[t2+1] - jmap2[t2];
6140         t2++; t++;
6141       }
6142     }
6143     /* Merge the remaining in either j1[] or j2[] */
6144     while (b1 < e1) {
6145       j[t]      = j1[b1];
6146       imap1[t1] = t;
6147       b1       += jmap1[t1+1] - jmap1[t1];
6148       t1++; t++;
6149     }
6150     while (b2 < e2) {
6151       j[t]      = j2[b2];
6152       imap2[t2] = t;
6153       b2       += jmap2[t2+1] - jmap2[t2];
6154       t2++; t++;
6155     }
6156     i[r+1] = t;
6157   }
6158   PetscFunctionReturn(0);
6159 }
6160 
6161 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block
6162 
6163   Input Parameters:
6164     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6165     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6166       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6167 
6168       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6169       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6170 
6171   Output Parameters:
6172     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6173     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6174       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6175       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6176 
6177     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6178       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6179         repeats (i.e., same 'i,j' pair).
6180       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6181         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6182 
6183       Atot: number of entries belonging to the diagonal block
6184       Annz: number of unique nonzeros belonging to the diagonal block.
6185 
6186     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6187 
6188     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
6189 */
6190 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6191   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6192   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6193   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6194 {
6195   PetscErrorCode    ierr;
6196   PetscInt          cstart,cend,rstart,rend,row,col;
6197   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6198   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6199   PetscCount        k,m,p,q,r,s,mid;
6200   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6201 
6202   PetscFunctionBegin;
6203   ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr);
6204   ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr);
6205   m    = rend - rstart;
6206 
6207   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6208 
6209   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6210      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6211   */
6212   while (k<n) {
6213     row = i[k];
6214     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6215     for (s=k; s<n; s++) if (i[s] != row) break;
6216     for (p=k; p<s; p++) {
6217       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6218      #if defined(PETSC_USE_DEBUG)
6219       else if (j[p] < 0 || j[p] > mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6220      #endif
6221     }
6222     ierr = PetscSortIntWithCountArray(s-k,j+k,perm+k);CHKERRQ(ierr);
6223     ierr = PetscSortedIntUpperBound(j,k,s,-1,&mid);CHKERRQ(ierr); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6224     rowBegin[row-rstart] = k;
6225     rowMid[row-rstart]   = mid;
6226     rowEnd[row-rstart]   = s;
6227 
6228     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6229     Atot += mid - k;
6230     Btot += s - mid;
6231 
6232     /* Count unique nonzeros of this diag/offdiag row */
6233     for (p=k; p<mid;) {
6234       col = j[p];
6235       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6236       Annz++;
6237     }
6238 
6239     for (p=mid; p<s;) {
6240       col = j[p];
6241       do {p++;} while (p<s && j[p] == col);
6242       Bnnz++;
6243     }
6244     k = s;
6245   }
6246 
6247   /* Allocation according to Atot, Btot, Annz, Bnnz */
6248   ierr = PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap);CHKERRQ(ierr);
6249 
6250   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6251   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6252   for (r=0; r<m; r++) {
6253     k     = rowBegin[r];
6254     mid   = rowMid[r];
6255     s     = rowEnd[r];
6256     ierr  = PetscArraycpy(Aperm+Atot,perm+k,  mid-k);CHKERRQ(ierr);
6257     ierr  = PetscArraycpy(Bperm+Btot,perm+mid,s-mid);CHKERRQ(ierr);
6258     Atot += mid - k;
6259     Btot += s - mid;
6260 
6261     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6262     for (p=k; p<mid;) {
6263       col = j[p];
6264       q   = p;
6265       do {p++;} while (p<mid && j[p] == col);
6266       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6267       Annz++;
6268     }
6269 
6270     for (p=mid; p<s;) {
6271       col = j[p];
6272       q   = p;
6273       do {p++;} while (p<s && j[p] == col);
6274       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6275       Bnnz++;
6276     }
6277   }
6278   /* Output */
6279   *Aperm_ = Aperm;
6280   *Annz_  = Annz;
6281   *Atot_  = Atot;
6282   *Ajmap_ = Ajmap;
6283   *Bperm_ = Bperm;
6284   *Bnnz_  = Bnnz;
6285   *Btot_  = Btot;
6286   *Bjmap_ = Bjmap;
6287   PetscFunctionReturn(0);
6288 }
6289 
6290 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6291 {
6292   PetscErrorCode            ierr;
6293   MPI_Comm                  comm;
6294   PetscMPIInt               rank,size;
6295   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6296   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6297   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6298 
6299   PetscFunctionBegin;
6300   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
6301   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
6302   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
6303   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
6304   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
6305   ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr);
6306   ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr);
6307   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
6308   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
6309 
6310   /* ---------------------------------------------------------------------------*/
6311   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6312   /* entries come first, then local rows, then remote rows.                     */
6313   /* ---------------------------------------------------------------------------*/
6314   PetscCount n1 = coo_n,*perm1;
6315   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6316   ierr = PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1);CHKERRQ(ierr);
6317   ierr = PetscArraycpy(i1,coo_i,n1);CHKERRQ(ierr); /* Make a copy since we'll modify it */
6318   ierr = PetscArraycpy(j1,coo_j,n1);CHKERRQ(ierr);
6319   for (k=0; k<n1; k++) perm1[k] = k;
6320 
6321   /* Manipulate indices so that entries with negative row or col indices will have smallest
6322      row indices, local entries will have greater but negative row indices, and remote entries
6323      will have positive row indices.
6324   */
6325   for (k=0; k<n1; k++) {
6326     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6327     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6328     else if (mat->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6329     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6330   }
6331 
6332   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6333   ierr = PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1);CHKERRQ(ierr);
6334   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6335   ierr = PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem);CHKERRQ(ierr); /* rem is upper bound of the last local row */
6336   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6337 
6338   /* ---------------------------------------------------------------------------*/
6339   /*           Split local rows into diag/offdiag portions                      */
6340   /* ---------------------------------------------------------------------------*/
6341   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6342   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6343   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6344 
6345   ierr = PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1);CHKERRQ(ierr);
6346   ierr = PetscMalloc1(n1-rem,&Cperm1);CHKERRQ(ierr);
6347   ierr = MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1);CHKERRQ(ierr);
6348 
6349   /* ---------------------------------------------------------------------------*/
6350   /*           Send remote rows to their owner                                  */
6351   /* ---------------------------------------------------------------------------*/
6352   /* Find which rows should be sent to which remote ranks*/
6353   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6354   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6355   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6356   const PetscInt *ranges;
6357   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6358 
6359   ierr = PetscLayoutGetRanges(mat->rmap,&ranges);CHKERRQ(ierr);
6360   ierr = PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries);CHKERRQ(ierr);
6361   for (k=rem; k<n1;) {
6362     PetscMPIInt  owner;
6363     PetscInt     firstRow,lastRow;
6364     /* Locate a row range */
6365     firstRow = i1[k]; /* first row of this owner */
6366     ierr     = PetscLayoutFindOwner(mat->rmap,firstRow,&owner);CHKERRQ(ierr);
6367     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6368 
6369     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6370     ierr     = PetscSortedIntUpperBound(i1,k,n1,lastRow,&p);CHKERRQ(ierr);
6371 
6372     /* All entries in [k,p) belong to this remote owner */
6373     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6374       PetscMPIInt *sendto2;
6375       PetscInt    *nentries2;
6376       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6377       ierr = PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2);CHKERRQ(ierr);
6378       ierr = PetscArraycpy(sendto2,sendto,maxNsend);CHKERRQ(ierr);
6379       ierr = PetscArraycpy(nentries2,nentries2,maxNsend+1);CHKERRQ(ierr);
6380       ierr = PetscFree2(sendto,nentries2);CHKERRQ(ierr);
6381       sendto      = sendto2;
6382       nentries    = nentries2;
6383       maxNsend    = maxNsend2;
6384     }
6385     sendto[nsend]   = owner;
6386     nentries[nsend] = p - k;
6387     ierr = PetscCountCast(p-k,&nentries[nsend]);CHKERRQ(ierr);
6388     nsend++;
6389     k = p;
6390   }
6391 
6392   /* Build 1st SF to know offsets on remote to send data */
6393   PetscSF     sf1;
6394   PetscInt    nroots = 1,nroots2 = 0;
6395   PetscInt    nleaves = nsend,nleaves2 = 0;
6396   PetscInt    *offsets;
6397   PetscSFNode *iremote;
6398 
6399   ierr = PetscSFCreate(comm,&sf1);CHKERRQ(ierr);
6400   ierr = PetscMalloc1(nsend,&iremote);CHKERRQ(ierr);
6401   ierr = PetscMalloc1(nsend,&offsets);CHKERRQ(ierr);
6402   for (k=0; k<nsend; k++) {
6403     iremote[k].rank  = sendto[k];
6404     iremote[k].index = 0;
6405     nleaves2        += nentries[k];
6406     if (PetscUnlikely(nleaves2 < 0)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6407   }
6408   ierr = PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
6409   ierr = PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM);CHKERRQ(ierr);
6410   ierr = PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM);CHKERRQ(ierr); /* Would nroots2 overflow, we check offsets[] below */
6411   ierr = PetscSFDestroy(&sf1);CHKERRQ(ierr);
6412   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);
6413 
6414   /* Build 2nd SF to send remote COOs to their owner */
6415   PetscSF sf2;
6416   nroots  = nroots2;
6417   nleaves = nleaves2;
6418   ierr    = PetscSFCreate(comm,&sf2);CHKERRQ(ierr);
6419   ierr    = PetscSFSetFromOptions(sf2);CHKERRQ(ierr);
6420   ierr    = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
6421   p       = 0;
6422   for (k=0; k<nsend; k++) {
6423     if (PetscUnlikely(offsets[k] < 0)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6424     for (q=0; q<nentries[k]; q++,p++) {
6425       iremote[p].rank  = sendto[k];
6426       iremote[p].index = offsets[k] + q;
6427     }
6428   }
6429   ierr = PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
6430 
6431   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
6432   ierr = PetscArraycpy(Cperm1,perm1+rem,n1-rem);CHKERRQ(ierr);
6433 
6434   /* Send the remote COOs to their owner */
6435   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6436   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6437   ierr = PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2);CHKERRQ(ierr);
6438   ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE);CHKERRQ(ierr);
6439   ierr = PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE);CHKERRQ(ierr);
6440   ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE);CHKERRQ(ierr);
6441   ierr = PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE);CHKERRQ(ierr);
6442 
6443   ierr = PetscFree(offsets);CHKERRQ(ierr);
6444   ierr = PetscFree2(sendto,nentries);CHKERRQ(ierr);
6445 
6446   /* ---------------------------------------------------------------*/
6447   /* Sort received COOs by row along with the permutation array     */
6448   /* ---------------------------------------------------------------*/
6449   for (k=0; k<n2; k++) perm2[k] = k;
6450   ierr = PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2);CHKERRQ(ierr);
6451 
6452   /* ---------------------------------------------------------------*/
6453   /* Split received COOs into diag/offdiag portions                 */
6454   /* ---------------------------------------------------------------*/
6455   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6456   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6457   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6458 
6459   ierr = PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2);CHKERRQ(ierr);
6460   ierr = MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2);CHKERRQ(ierr);
6461 
6462   /* --------------------------------------------------------------------------*/
6463   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6464   /* --------------------------------------------------------------------------*/
6465   PetscInt   *Ai,*Bi;
6466   PetscInt   *Aj,*Bj;
6467 
6468   ierr  = PetscMalloc1(m+1,&Ai);CHKERRQ(ierr);
6469   ierr  = PetscMalloc1(m+1,&Bi);CHKERRQ(ierr);
6470   ierr  = PetscMalloc1(Annz1+Annz2,&Aj);CHKERRQ(ierr); /* Since local and remote entries might have dups, we might allocate excess memory */
6471   ierr  = PetscMalloc1(Bnnz1+Bnnz2,&Bj);CHKERRQ(ierr);
6472 
6473   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6474   ierr = PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2);CHKERRQ(ierr);
6475 
6476   ierr = MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj);CHKERRQ(ierr);
6477   ierr = MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj);CHKERRQ(ierr);
6478   ierr = PetscFree3(rowBegin1,rowMid1,rowEnd1);CHKERRQ(ierr);
6479   ierr = PetscFree3(rowBegin2,rowMid2,rowEnd2);CHKERRQ(ierr);
6480   ierr = PetscFree3(i1,j1,perm1);CHKERRQ(ierr);
6481   ierr = PetscFree3(i2,j2,perm2);CHKERRQ(ierr);
6482 
6483   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6484   PetscInt Annz = Ai[m];
6485   PetscInt Bnnz = Bi[m];
6486   if (Annz < Annz1 + Annz2) {
6487     PetscInt *Aj_new;
6488     ierr = PetscMalloc1(Annz,&Aj_new);CHKERRQ(ierr);
6489     ierr = PetscArraycpy(Aj_new,Aj,Annz);CHKERRQ(ierr);
6490     ierr = PetscFree(Aj);CHKERRQ(ierr);
6491     Aj   = Aj_new;
6492   }
6493 
6494   if (Bnnz < Bnnz1 + Bnnz2) {
6495     PetscInt *Bj_new;
6496     ierr = PetscMalloc1(Bnnz,&Bj_new);CHKERRQ(ierr);
6497     ierr = PetscArraycpy(Bj_new,Bj,Bnnz);CHKERRQ(ierr);
6498     ierr = PetscFree(Bj);CHKERRQ(ierr);
6499     Bj   = Bj_new;
6500   }
6501 
6502   /* --------------------------------------------------------------------------------*/
6503   /* Create a MPIAIJKOKKOS newmat with CSRs of A and B, then replace mat with newmat */
6504   /* --------------------------------------------------------------------------------*/
6505   Mat           newmat;
6506   PetscScalar   *Aa,*Ba;
6507   Mat_SeqAIJ    *a,*b;
6508 
6509   ierr   = PetscCalloc1(Annz,&Aa);CHKERRQ(ierr); /* Zero matrix on device */
6510   ierr   = PetscCalloc1(Bnnz,&Ba);CHKERRQ(ierr);
6511   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6512   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6513   ierr   = MatCreateMPIAIJWithSplitArrays(comm,m,n,M,N,Ai,Aj,Aa,Bi,Bj,Ba,&newmat);CHKERRQ(ierr); /* FIXME: Can we do it without creating a new mat? */
6514   ierr   = MatHeaderMerge(mat,&newmat);CHKERRQ(ierr); /* Unlike MatHeaderReplace(), some info, ex. mat->product is kept */
6515   mpiaij = (Mat_MPIAIJ*)mat->data;
6516   a      = (Mat_SeqAIJ*)mpiaij->A->data;
6517   b      = (Mat_SeqAIJ*)mpiaij->B->data;
6518   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6519   a->free_a       = b->free_a       = PETSC_TRUE;
6520   a->free_ij      = b->free_ij      = PETSC_TRUE;
6521 
6522   mpiaij->coo_n   = coo_n;
6523   mpiaij->coo_sf  = sf2;
6524   mpiaij->sendlen = nleaves;
6525   mpiaij->recvlen = nroots;
6526 
6527   mpiaij->Annz1   = Annz1;
6528   mpiaij->Annz2   = Annz2;
6529   mpiaij->Bnnz1   = Bnnz1;
6530   mpiaij->Bnnz2   = Bnnz2;
6531 
6532   mpiaij->Atot1   = Atot1;
6533   mpiaij->Atot2   = Atot2;
6534   mpiaij->Btot1   = Btot1;
6535   mpiaij->Btot2   = Btot2;
6536 
6537   mpiaij->Aimap1  = Aimap1;
6538   mpiaij->Aimap2  = Aimap2;
6539   mpiaij->Bimap1  = Bimap1;
6540   mpiaij->Bimap2  = Bimap2;
6541 
6542   mpiaij->Ajmap1  = Ajmap1;
6543   mpiaij->Ajmap2  = Ajmap2;
6544   mpiaij->Bjmap1  = Bjmap1;
6545   mpiaij->Bjmap2  = Bjmap2;
6546 
6547   mpiaij->Aperm1  = Aperm1;
6548   mpiaij->Aperm2  = Aperm2;
6549   mpiaij->Bperm1  = Bperm1;
6550   mpiaij->Bperm2  = Bperm2;
6551 
6552   mpiaij->Cperm1  = Cperm1;
6553 
6554   /* Allocate in preallocation. If not used, it has zero cost on host */
6555   ierr = PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf);CHKERRQ(ierr);
6556   PetscFunctionReturn(0);
6557 }
6558 
6559 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6560 {
6561   PetscErrorCode       ierr;
6562   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6563   Mat                  A = mpiaij->A,B = mpiaij->B;
6564   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6565   PetscScalar          *Aa,*Ba;
6566   PetscScalar          *sendbuf = mpiaij->sendbuf;
6567   PetscScalar          *recvbuf = mpiaij->recvbuf;
6568   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6569   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6570   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6571   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6572 
6573   PetscFunctionBegin;
6574   ierr = MatSeqAIJGetArray(A,&Aa);CHKERRQ(ierr); /* Might read and write matrix values */
6575   ierr = MatSeqAIJGetArray(B,&Ba);CHKERRQ(ierr);
6576   if (imode == INSERT_VALUES) {
6577     ierr = PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr);
6578     ierr = PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr);
6579   }
6580 
6581   /* Pack entries to be sent to remote */
6582   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6583 
6584   /* Send remote entries to their owner and overlap the communication with local computation */
6585   ierr = PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE);CHKERRQ(ierr);
6586   /* Add local entries to A and B */
6587   for (PetscCount i=0; i<Annz1; i++) {
6588     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6589   }
6590   for (PetscCount i=0; i<Bnnz1; i++) {
6591     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6592   }
6593   ierr = PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE);CHKERRQ(ierr);
6594 
6595   /* Add received remote entries to A and B */
6596   for (PetscCount i=0; i<Annz2; i++) {
6597     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6598   }
6599   for (PetscCount i=0; i<Bnnz2; i++) {
6600     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6601   }
6602   ierr = MatSeqAIJRestoreArray(A,&Aa);CHKERRQ(ierr);
6603   ierr = MatSeqAIJRestoreArray(B,&Ba);CHKERRQ(ierr);
6604   PetscFunctionReturn(0);
6605 }
6606 
6607 /* ----------------------------------------------------------------*/
6608 
6609 /*MC
6610    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6611 
6612    Options Database Keys:
6613 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6614 
6615    Level: beginner
6616 
6617    Notes:
6618     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6619     in this case the values associated with the rows and columns one passes in are set to zero
6620     in the matrix
6621 
6622     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6623     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6624 
6625 .seealso: MatCreateAIJ()
6626 M*/
6627 
6628 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6629 {
6630   Mat_MPIAIJ     *b;
6631   PetscErrorCode ierr;
6632   PetscMPIInt    size;
6633 
6634   PetscFunctionBegin;
6635   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6636 
6637   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6638   B->data       = (void*)b;
6639   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6640   B->assembled  = PETSC_FALSE;
6641   B->insertmode = NOT_SET_VALUES;
6642   b->size       = size;
6643 
6644   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6645 
6646   /* build cache for off array entries formed */
6647   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6648 
6649   b->donotstash  = PETSC_FALSE;
6650   b->colmap      = NULL;
6651   b->garray      = NULL;
6652   b->roworiented = PETSC_TRUE;
6653 
6654   /* stuff used for matrix vector multiply */
6655   b->lvec  = NULL;
6656   b->Mvctx = NULL;
6657 
6658   /* stuff for MatGetRow() */
6659   b->rowindices   = NULL;
6660   b->rowvalues    = NULL;
6661   b->getrowactive = PETSC_FALSE;
6662 
6663   /* flexible pointer used in CUSPARSE classes */
6664   b->spptr = NULL;
6665 
6666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6676 #if defined(PETSC_HAVE_CUDA)
6677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6678 #endif
6679 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6681 #endif
6682 #if defined(PETSC_HAVE_MKL_SPARSE)
6683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6684 #endif
6685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6689 #if defined(PETSC_HAVE_ELEMENTAL)
6690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6691 #endif
6692 #if defined(PETSC_HAVE_SCALAPACK)
6693   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6694 #endif
6695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6696   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6697 #if defined(PETSC_HAVE_HYPRE)
6698   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6699   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6700 #endif
6701   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6702   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6703   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ);CHKERRQ(ierr);
6704   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ);CHKERRQ(ierr);
6705   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6706   PetscFunctionReturn(0);
6707 }
6708 
6709 /*@C
6710      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6711          and "off-diagonal" part of the matrix in CSR format.
6712 
6713    Collective
6714 
6715    Input Parameters:
6716 +  comm - MPI communicator
6717 .  m - number of local rows (Cannot be PETSC_DECIDE)
6718 .  n - This value should be the same as the local size used in creating the
6719        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6720        calculated if N is given) For square matrices n is almost always m.
6721 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6722 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6723 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6724 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6725 .   a - matrix values
6726 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6727 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6728 -   oa - matrix values
6729 
6730    Output Parameter:
6731 .   mat - the matrix
6732 
6733    Level: advanced
6734 
6735    Notes:
6736        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6737        must free the arrays once the matrix has been destroyed and not before.
6738 
6739        The i and j indices are 0 based
6740 
6741        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6742 
6743        This sets local rows and cannot be used to set off-processor values.
6744 
6745        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6746        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6747        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6748        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6749        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6750        communication if it is known that only local entries will be set.
6751 
6752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6753           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6754 @*/
6755 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6756 {
6757   PetscErrorCode ierr;
6758   Mat_MPIAIJ     *maij;
6759 
6760   PetscFunctionBegin;
6761   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6762   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6763   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6764   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6765   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6766   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6767   maij = (Mat_MPIAIJ*) (*mat)->data;
6768 
6769   (*mat)->preallocated = PETSC_TRUE;
6770 
6771   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6772   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6773 
6774   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6775   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6776 
6777   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6778   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6779   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6780   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6781   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6782   PetscFunctionReturn(0);
6783 }
6784 
6785 typedef struct {
6786   Mat       *mp;    /* intermediate products */
6787   PetscBool *mptmp; /* is the intermediate product temporary ? */
6788   PetscInt  cp;     /* number of intermediate products */
6789 
6790   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6791   PetscInt    *startsj_s,*startsj_r;
6792   PetscScalar *bufa;
6793   Mat         P_oth;
6794 
6795   /* may take advantage of merging product->B */
6796   Mat Bloc; /* B-local by merging diag and off-diag */
6797 
6798   /* cusparse does not have support to split between symbolic and numeric phases.
6799      When api_user is true, we don't need to update the numerical values
6800      of the temporary storage */
6801   PetscBool reusesym;
6802 
6803   /* support for COO values insertion */
6804   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6805   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6806   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6807   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6808   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6809   PetscMemType mtype;
6810 
6811   /* customization */
6812   PetscBool abmerge;
6813   PetscBool P_oth_bind;
6814 } MatMatMPIAIJBACKEND;
6815 
6816 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6817 {
6818   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6819   PetscInt            i;
6820   PetscErrorCode      ierr;
6821 
6822   PetscFunctionBegin;
6823   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6824   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6825   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6826   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6827   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6828   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6829   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6830   for (i = 0; i < mmdata->cp; i++) {
6831     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6832   }
6833   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6834   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6835   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6836   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6837   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6838   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6839   PetscFunctionReturn(0);
6840 }
6841 
6842 /* Copy selected n entries with indices in idx[] of A to v[].
6843    If idx is NULL, copy the whole data array of A to v[]
6844  */
6845 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6846 {
6847   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6848   PetscErrorCode ierr;
6849 
6850   PetscFunctionBegin;
6851   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6852   if (f) {
6853     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6854   } else {
6855     const PetscScalar *vv;
6856 
6857     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6858     if (n && idx) {
6859       PetscScalar    *w = v;
6860       const PetscInt *oi = idx;
6861       PetscInt       j;
6862 
6863       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6864     } else {
6865       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6866     }
6867     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6868   }
6869   PetscFunctionReturn(0);
6870 }
6871 
6872 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6873 {
6874   MatMatMPIAIJBACKEND *mmdata;
6875   PetscInt            i,n_d,n_o;
6876   PetscErrorCode      ierr;
6877 
6878   PetscFunctionBegin;
6879   MatCheckProduct(C,1);
6880   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6881   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6882   if (!mmdata->reusesym) { /* update temporary matrices */
6883     if (mmdata->P_oth) {
6884       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6885     }
6886     if (mmdata->Bloc) {
6887       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6888     }
6889   }
6890   mmdata->reusesym = PETSC_FALSE;
6891 
6892   for (i = 0; i < mmdata->cp; i++) {
6893     PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6894     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6895   }
6896   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6897     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6898 
6899     if (mmdata->mptmp[i]) continue;
6900     if (noff) {
6901       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6902 
6903       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6904       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6905       n_o += noff;
6906       n_d += nown;
6907     } else {
6908       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6909 
6910       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6911       n_d += mm->nz;
6912     }
6913   }
6914   if (mmdata->hasoffproc) { /* offprocess insertion */
6915     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6916     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6917   }
6918   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6919   PetscFunctionReturn(0);
6920 }
6921 
6922 /* Support for Pt * A, A * P, or Pt * A * P */
6923 #define MAX_NUMBER_INTERMEDIATE 4
6924 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6925 {
6926   Mat_Product            *product = C->product;
6927   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6928   Mat_MPIAIJ             *a,*p;
6929   MatMatMPIAIJBACKEND    *mmdata;
6930   ISLocalToGlobalMapping P_oth_l2g = NULL;
6931   IS                     glob = NULL;
6932   const char             *prefix;
6933   char                   pprefix[256];
6934   const PetscInt         *globidx,*P_oth_idx;
6935   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6936   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6937   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6938                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6939                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6940   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6941 
6942   MatProductType         ptype;
6943   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6944   PetscMPIInt            size;
6945   PetscErrorCode         ierr;
6946 
6947   PetscFunctionBegin;
6948   MatCheckProduct(C,1);
6949   PetscCheckFalse(product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6950   ptype = product->type;
6951   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6952     ptype = MATPRODUCT_AB;
6953     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6954   }
6955   switch (ptype) {
6956   case MATPRODUCT_AB:
6957     A = product->A;
6958     P = product->B;
6959     m = A->rmap->n;
6960     n = P->cmap->n;
6961     M = A->rmap->N;
6962     N = P->cmap->N;
6963     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6964     break;
6965   case MATPRODUCT_AtB:
6966     P = product->A;
6967     A = product->B;
6968     m = P->cmap->n;
6969     n = A->cmap->n;
6970     M = P->cmap->N;
6971     N = A->cmap->N;
6972     hasoffproc = PETSC_TRUE;
6973     break;
6974   case MATPRODUCT_PtAP:
6975     A = product->A;
6976     P = product->B;
6977     m = P->cmap->n;
6978     n = P->cmap->n;
6979     M = P->cmap->N;
6980     N = P->cmap->N;
6981     hasoffproc = PETSC_TRUE;
6982     break;
6983   default:
6984     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6985   }
6986   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6987   if (size == 1) hasoffproc = PETSC_FALSE;
6988 
6989   /* defaults */
6990   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6991     mp[i]    = NULL;
6992     mptmp[i] = PETSC_FALSE;
6993     rmapt[i] = -1;
6994     cmapt[i] = -1;
6995     rmapa[i] = NULL;
6996     cmapa[i] = NULL;
6997   }
6998 
6999   /* customization */
7000   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
7001   mmdata->reusesym = product->api_user;
7002   if (ptype == MATPRODUCT_AB) {
7003     if (product->api_user) {
7004       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7005       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
7006       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7007       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7008     } else {
7009       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7010       ierr = PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
7011       ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7012       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7013     }
7014   } else if (ptype == MATPRODUCT_PtAP) {
7015     if (product->api_user) {
7016       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7017       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7018       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7019     } else {
7020       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7021       ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7022       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7023     }
7024   }
7025   a = (Mat_MPIAIJ*)A->data;
7026   p = (Mat_MPIAIJ*)P->data;
7027   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
7028   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
7029   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
7030   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
7031   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
7032 
7033   cp   = 0;
7034   switch (ptype) {
7035   case MATPRODUCT_AB: /* A * P */
7036     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
7037 
7038     /* A_diag * P_local (merged or not) */
7039     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7040       /* P is product->B */
7041       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7042       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7043       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7044       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7045       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7046       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7047       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7048       mp[cp]->product->api_user = product->api_user;
7049       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7050       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7051       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7052       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7053       rmapt[cp] = 1;
7054       cmapt[cp] = 2;
7055       cmapa[cp] = globidx;
7056       mptmp[cp] = PETSC_FALSE;
7057       cp++;
7058     } else { /* A_diag * P_diag and A_diag * P_off */
7059       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
7060       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7061       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7062       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7063       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7064       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7065       mp[cp]->product->api_user = product->api_user;
7066       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7067       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7068       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7069       rmapt[cp] = 1;
7070       cmapt[cp] = 1;
7071       mptmp[cp] = PETSC_FALSE;
7072       cp++;
7073       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
7074       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7075       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7076       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7077       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7078       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7079       mp[cp]->product->api_user = product->api_user;
7080       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7081       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7082       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7083       rmapt[cp] = 1;
7084       cmapt[cp] = 2;
7085       cmapa[cp] = p->garray;
7086       mptmp[cp] = PETSC_FALSE;
7087       cp++;
7088     }
7089 
7090     /* A_off * P_other */
7091     if (mmdata->P_oth) {
7092       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
7093       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7094       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
7095       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
7096       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
7097       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7098       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7099       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7100       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7101       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7102       mp[cp]->product->api_user = product->api_user;
7103       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7104       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7105       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7106       rmapt[cp] = 1;
7107       cmapt[cp] = 2;
7108       cmapa[cp] = P_oth_idx;
7109       mptmp[cp] = PETSC_FALSE;
7110       cp++;
7111     }
7112     break;
7113 
7114   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7115     /* A is product->B */
7116     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7117     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7118       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7119       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7120       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7121       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7122       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7123       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7124       mp[cp]->product->api_user = product->api_user;
7125       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7126       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7127       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7128       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7129       rmapt[cp] = 2;
7130       rmapa[cp] = globidx;
7131       cmapt[cp] = 2;
7132       cmapa[cp] = globidx;
7133       mptmp[cp] = PETSC_FALSE;
7134       cp++;
7135     } else {
7136       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7137       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7138       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7139       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7140       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7141       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7142       mp[cp]->product->api_user = product->api_user;
7143       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7144       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7145       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7146       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7147       rmapt[cp] = 1;
7148       cmapt[cp] = 2;
7149       cmapa[cp] = globidx;
7150       mptmp[cp] = PETSC_FALSE;
7151       cp++;
7152       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7153       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7154       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7155       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7156       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7157       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7158       mp[cp]->product->api_user = product->api_user;
7159       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7160       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7161       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7162       rmapt[cp] = 2;
7163       rmapa[cp] = p->garray;
7164       cmapt[cp] = 2;
7165       cmapa[cp] = globidx;
7166       mptmp[cp] = PETSC_FALSE;
7167       cp++;
7168     }
7169     break;
7170   case MATPRODUCT_PtAP:
7171     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
7172     /* P is product->B */
7173     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7174     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7175     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
7176     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7177     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7178     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7179     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7180     mp[cp]->product->api_user = product->api_user;
7181     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7182     PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7183     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7184     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7185     rmapt[cp] = 2;
7186     rmapa[cp] = globidx;
7187     cmapt[cp] = 2;
7188     cmapa[cp] = globidx;
7189     mptmp[cp] = PETSC_FALSE;
7190     cp++;
7191     if (mmdata->P_oth) {
7192       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
7193       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7194       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
7195       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
7196       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
7197       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7198       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7199       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7200       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7201       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7202       mp[cp]->product->api_user = product->api_user;
7203       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7204       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7205       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7206       mptmp[cp] = PETSC_TRUE;
7207       cp++;
7208       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
7209       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7210       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7211       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7212       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7213       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7214       mp[cp]->product->api_user = product->api_user;
7215       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7216       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7217       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7218       rmapt[cp] = 2;
7219       rmapa[cp] = globidx;
7220       cmapt[cp] = 2;
7221       cmapa[cp] = P_oth_idx;
7222       mptmp[cp] = PETSC_FALSE;
7223       cp++;
7224     }
7225     break;
7226   default:
7227     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7228   }
7229   /* sanity check */
7230   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7231 
7232   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
7233   for (i = 0; i < cp; i++) {
7234     mmdata->mp[i]    = mp[i];
7235     mmdata->mptmp[i] = mptmp[i];
7236   }
7237   mmdata->cp = cp;
7238   C->product->data       = mmdata;
7239   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7240   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7241 
7242   /* memory type */
7243   mmdata->mtype = PETSC_MEMTYPE_HOST;
7244   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
7245   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
7246   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7247 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
7248   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
7249 #endif
7250 
7251   /* prepare coo coordinates for values insertion */
7252 
7253   /* count total nonzeros of those intermediate seqaij Mats
7254     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7255     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7256     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7257   */
7258   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7259     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7260     if (mptmp[cp]) continue;
7261     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7262       const PetscInt *rmap = rmapa[cp];
7263       const PetscInt mr = mp[cp]->rmap->n;
7264       const PetscInt rs = C->rmap->rstart;
7265       const PetscInt re = C->rmap->rend;
7266       const PetscInt *ii  = mm->i;
7267       for (i = 0; i < mr; i++) {
7268         const PetscInt gr = rmap[i];
7269         const PetscInt nz = ii[i+1] - ii[i];
7270         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7271         else ncoo_oown += nz; /* this row is local */
7272       }
7273     } else ncoo_d += mm->nz;
7274   }
7275 
7276   /*
7277     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7278 
7279     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7280 
7281     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7282 
7283     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7284     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7285     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7286 
7287     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7288     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7289   */
7290   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
7291   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
7292 
7293   /* gather (i,j) of nonzeros inserted by remote procs */
7294   if (hasoffproc) {
7295     PetscSF  msf;
7296     PetscInt ncoo2,*coo_i2,*coo_j2;
7297 
7298     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
7299     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
7300     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
7301 
7302     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7303       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7304       PetscInt   *idxoff = mmdata->off[cp];
7305       PetscInt   *idxown = mmdata->own[cp];
7306       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7307         const PetscInt *rmap = rmapa[cp];
7308         const PetscInt *cmap = cmapa[cp];
7309         const PetscInt *ii  = mm->i;
7310         PetscInt       *coi = coo_i + ncoo_o;
7311         PetscInt       *coj = coo_j + ncoo_o;
7312         const PetscInt mr = mp[cp]->rmap->n;
7313         const PetscInt rs = C->rmap->rstart;
7314         const PetscInt re = C->rmap->rend;
7315         const PetscInt cs = C->cmap->rstart;
7316         for (i = 0; i < mr; i++) {
7317           const PetscInt *jj = mm->j + ii[i];
7318           const PetscInt gr  = rmap[i];
7319           const PetscInt nz  = ii[i+1] - ii[i];
7320           if (gr < rs || gr >= re) { /* this is an offproc row */
7321             for (j = ii[i]; j < ii[i+1]; j++) {
7322               *coi++ = gr;
7323               *idxoff++ = j;
7324             }
7325             if (!cmapt[cp]) { /* already global */
7326               for (j = 0; j < nz; j++) *coj++ = jj[j];
7327             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7328               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7329             } else { /* offdiag */
7330               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7331             }
7332             ncoo_o += nz;
7333           } else { /* this is a local row */
7334             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7335           }
7336         }
7337       }
7338       mmdata->off[cp + 1] = idxoff;
7339       mmdata->own[cp + 1] = idxown;
7340     }
7341 
7342     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
7343     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
7344     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
7345     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
7346     ncoo = ncoo_d + ncoo_oown + ncoo2;
7347     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
7348     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
7349     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7350     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7351     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7352     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7353     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7354     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
7355     coo_i = coo_i2;
7356     coo_j = coo_j2;
7357   } else { /* no offproc values insertion */
7358     ncoo = ncoo_d;
7359     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
7360 
7361     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
7362     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
7363     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
7364   }
7365   mmdata->hasoffproc = hasoffproc;
7366 
7367    /* gather (i,j) of nonzeros inserted locally */
7368   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7369     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7370     PetscInt       *coi = coo_i + ncoo_d;
7371     PetscInt       *coj = coo_j + ncoo_d;
7372     const PetscInt *jj  = mm->j;
7373     const PetscInt *ii  = mm->i;
7374     const PetscInt *cmap = cmapa[cp];
7375     const PetscInt *rmap = rmapa[cp];
7376     const PetscInt mr = mp[cp]->rmap->n;
7377     const PetscInt rs = C->rmap->rstart;
7378     const PetscInt re = C->rmap->rend;
7379     const PetscInt cs = C->cmap->rstart;
7380 
7381     if (mptmp[cp]) continue;
7382     if (rmapt[cp] == 1) { /* consecutive rows */
7383       /* fill coo_i */
7384       for (i = 0; i < mr; i++) {
7385         const PetscInt gr = i + rs;
7386         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7387       }
7388       /* fill coo_j */
7389       if (!cmapt[cp]) { /* type-0, already global */
7390         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
7391       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7392         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7393       } else { /* type-2, local to global for sparse columns */
7394         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7395       }
7396       ncoo_d += mm->nz;
7397     } else if (rmapt[cp] == 2) { /* sparse rows */
7398       for (i = 0; i < mr; i++) {
7399         const PetscInt *jj = mm->j + ii[i];
7400         const PetscInt gr  = rmap[i];
7401         const PetscInt nz  = ii[i+1] - ii[i];
7402         if (gr >= rs && gr < re) { /* local rows */
7403           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7404           if (!cmapt[cp]) { /* type-0, already global */
7405             for (j = 0; j < nz; j++) *coj++ = jj[j];
7406           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7407             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7408           } else { /* type-2, local to global for sparse columns */
7409             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7410           }
7411           ncoo_d += nz;
7412         }
7413       }
7414     }
7415   }
7416   if (glob) {
7417     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
7418   }
7419   ierr = ISDestroy(&glob);CHKERRQ(ierr);
7420   if (P_oth_l2g) {
7421     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7422   }
7423   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
7424   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7425   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
7426 
7427   /* preallocate with COO data */
7428   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
7429   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7430   PetscFunctionReturn(0);
7431 }
7432 
7433 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7434 {
7435   Mat_Product    *product = mat->product;
7436   PetscErrorCode ierr;
7437 #if defined(PETSC_HAVE_DEVICE)
7438   PetscBool      match = PETSC_FALSE;
7439   PetscBool      usecpu = PETSC_FALSE;
7440 #else
7441   PetscBool      match = PETSC_TRUE;
7442 #endif
7443 
7444   PetscFunctionBegin;
7445   MatCheckProduct(mat,1);
7446 #if defined(PETSC_HAVE_DEVICE)
7447   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7448     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7449   }
7450   if (match) { /* we can always fallback to the CPU if requested */
7451     switch (product->type) {
7452     case MATPRODUCT_AB:
7453       if (product->api_user) {
7454         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7455         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7456         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7457       } else {
7458         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7459         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7460         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7461       }
7462       break;
7463     case MATPRODUCT_AtB:
7464       if (product->api_user) {
7465         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7466         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7467         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7468       } else {
7469         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7470         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7471         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7472       }
7473       break;
7474     case MATPRODUCT_PtAP:
7475       if (product->api_user) {
7476         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7477         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7478         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7479       } else {
7480         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7481         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7482         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7483       }
7484       break;
7485     default:
7486       break;
7487     }
7488     match = (PetscBool)!usecpu;
7489   }
7490 #endif
7491   if (match) {
7492     switch (product->type) {
7493     case MATPRODUCT_AB:
7494     case MATPRODUCT_AtB:
7495     case MATPRODUCT_PtAP:
7496       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7497       break;
7498     default:
7499       break;
7500     }
7501   }
7502   /* fallback to MPIAIJ ops */
7503   if (!mat->ops->productsymbolic) {
7504     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7505   }
7506   PetscFunctionReturn(0);
7507 }
7508 
7509 /*
7510     Special version for direct calls from Fortran
7511 */
7512 #include <petsc/private/fortranimpl.h>
7513 
7514 /* Change these macros so can be used in void function */
7515 /* Identical to CHKERRV, except it assigns to *_ierr */
7516 #undef CHKERRQ
7517 #define CHKERRQ(ierr) do {                                                                     \
7518     PetscErrorCode ierr_msv_mpiaij = (ierr);                                                   \
7519     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7520       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7521       return;                                                                                  \
7522     }                                                                                          \
7523   } while (0)
7524 
7525 #undef SETERRQ
7526 #define SETERRQ(comm,ierr,...) do {                                                            \
7527     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7528     return;                                                                                    \
7529   } while (0)
7530 
7531 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7532 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7533 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7534 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7535 #else
7536 #endif
7537 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7538 {
7539   Mat            mat  = *mmat;
7540   PetscInt       m    = *mm, n = *mn;
7541   InsertMode     addv = *maddv;
7542   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
7543   PetscScalar    value;
7544   PetscErrorCode ierr;
7545 
7546   MatCheckPreallocated(mat,1);
7547   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7548   else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7549   {
7550     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7551     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7552     PetscBool roworiented = aij->roworiented;
7553 
7554     /* Some Variables required in the macro */
7555     Mat        A                    = aij->A;
7556     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7557     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7558     MatScalar  *aa;
7559     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7560     Mat        B                    = aij->B;
7561     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7562     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7563     MatScalar  *ba;
7564     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7565      * cannot use "#if defined" inside a macro. */
7566     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7567 
7568     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7569     PetscInt  nonew = a->nonew;
7570     MatScalar *ap1,*ap2;
7571 
7572     PetscFunctionBegin;
7573     ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
7574     ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
7575     for (i=0; i<m; i++) {
7576       if (im[i] < 0) continue;
7577       PetscAssertFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
7578       if (im[i] >= rstart && im[i] < rend) {
7579         row      = im[i] - rstart;
7580         lastcol1 = -1;
7581         rp1      = aj + ai[row];
7582         ap1      = aa + ai[row];
7583         rmax1    = aimax[row];
7584         nrow1    = ailen[row];
7585         low1     = 0;
7586         high1    = nrow1;
7587         lastcol2 = -1;
7588         rp2      = bj + bi[row];
7589         ap2      = ba + bi[row];
7590         rmax2    = bimax[row];
7591         nrow2    = bilen[row];
7592         low2     = 0;
7593         high2    = nrow2;
7594 
7595         for (j=0; j<n; j++) {
7596           if (roworiented) value = v[i*n+j];
7597           else value = v[i+j*m];
7598           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7599           if (in[j] >= cstart && in[j] < cend) {
7600             col = in[j] - cstart;
7601             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7602           } else if (in[j] < 0) continue;
7603           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7604             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7605             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7606           } else {
7607             if (mat->was_assembled) {
7608               if (!aij->colmap) {
7609                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
7610               }
7611 #if defined(PETSC_USE_CTABLE)
7612               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
7613               col--;
7614 #else
7615               col = aij->colmap[in[j]] - 1;
7616 #endif
7617               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7618                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
7619                 col  =  in[j];
7620                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7621                 B        = aij->B;
7622                 b        = (Mat_SeqAIJ*)B->data;
7623                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7624                 rp2      = bj + bi[row];
7625                 ap2      = ba + bi[row];
7626                 rmax2    = bimax[row];
7627                 nrow2    = bilen[row];
7628                 low2     = 0;
7629                 high2    = nrow2;
7630                 bm       = aij->B->rmap->n;
7631                 ba       = b->a;
7632                 inserted = PETSC_FALSE;
7633               }
7634             } else col = in[j];
7635             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7636           }
7637         }
7638       } else if (!aij->donotstash) {
7639         if (roworiented) {
7640           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
7641         } else {
7642           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
7643         }
7644       }
7645     }
7646     ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
7647     ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
7648   }
7649   PetscFunctionReturnVoid();
7650 }
7651 /* Undefining these here since they were redefined from their original definition above! No
7652  * other PETSc functions should be defined past this point, as it is impossible to recover the
7653  * original definitions */
7654 #undef CHKERRQ
7655 #undef SETERRQ
7656