xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62 
63   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
64    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
65    * to differ from the parent matrix. */
66   if (a->lvec) {
67     ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr);
68   }
69   if (a->diag) {
70     ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr);
71   }
72 
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
77 {
78   PetscErrorCode ierr;
79   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
80 
81   PetscFunctionBegin;
82   if (mat->A) {
83     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
84     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
90 {
91   PetscErrorCode  ierr;
92   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
93   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
94   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
95   const PetscInt  *ia,*ib;
96   const MatScalar *aa,*bb,*aav,*bav;
97   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
98   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
99 
100   PetscFunctionBegin;
101   *keptrows = NULL;
102 
103   ia   = a->i;
104   ib   = b->i;
105   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
106   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
107   for (i=0; i<m; i++) {
108     na = ia[i+1] - ia[i];
109     nb = ib[i+1] - ib[i];
110     if (!na && !nb) {
111       cnt++;
112       goto ok1;
113     }
114     aa = aav + ia[i];
115     for (j=0; j<na; j++) {
116       if (aa[j] != 0.0) goto ok1;
117     }
118     bb = bav + ib[i];
119     for (j=0; j <nb; j++) {
120       if (bb[j] != 0.0) goto ok1;
121     }
122     cnt++;
123 ok1:;
124   }
125   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
126   if (!n0rows) {
127     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
128     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
129     PetscFunctionReturn(0);
130   }
131   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
132   cnt  = 0;
133   for (i=0; i<m; i++) {
134     na = ia[i+1] - ia[i];
135     nb = ib[i+1] - ib[i];
136     if (!na && !nb) continue;
137     aa = aav + ia[i];
138     for (j=0; j<na;j++) {
139       if (aa[j] != 0.0) {
140         rows[cnt++] = rstart + i;
141         goto ok2;
142       }
143     }
144     bb = bav + ib[i];
145     for (j=0; j<nb; j++) {
146       if (bb[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151 ok2:;
152   }
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
154   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
155   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
156   PetscFunctionReturn(0);
157 }
158 
159 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
160 {
161   PetscErrorCode    ierr;
162   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
163   PetscBool         cong;
164 
165   PetscFunctionBegin;
166   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
167   if (Y->assembled && cong) {
168     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
169   } else {
170     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
176 {
177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
178   PetscErrorCode ierr;
179   PetscInt       i,rstart,nrows,*rows;
180 
181   PetscFunctionBegin;
182   *zrows = NULL;
183   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
184   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
185   for (i=0; i<nrows; i++) rows[i] += rstart;
186   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
191 {
192   PetscErrorCode    ierr;
193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
194   PetscInt          i,m,n,*garray = aij->garray;
195   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
196   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
197   PetscReal         *work;
198   const PetscScalar *dummy;
199 
200   PetscFunctionBegin;
201   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
202   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
203   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
204   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
205   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
206   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
207   if (type == NORM_2) {
208     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
209       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
210     }
211     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
212       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
213     }
214   } else if (type == NORM_1) {
215     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
216       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
217     }
218     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
219       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
220     }
221   } else if (type == NORM_INFINITY) {
222     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
223       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     }
225     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
226       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
227     }
228   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
229     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
230       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
231     }
232     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
233       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
234     }
235   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
236     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
237       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
238     }
239     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
240       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
241     }
242   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
243   if (type == NORM_INFINITY) {
244     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
245   } else {
246     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
247   }
248   ierr = PetscFree(work);CHKERRQ(ierr);
249   if (type == NORM_2) {
250     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
251   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
252     for (i=0; i<n; i++) reductions[i] /= m;
253   }
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
258 {
259   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
260   IS              sis,gis;
261   PetscErrorCode  ierr;
262   const PetscInt  *isis,*igis;
263   PetscInt        n,*iis,nsis,ngis,rstart,i;
264 
265   PetscFunctionBegin;
266   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
267   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
268   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
269   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
270   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
271   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
274   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
275   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
276   n    = ngis + nsis;
277   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
278   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
279   for (i=0; i<n; i++) iis[i] += rstart;
280   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
281 
282   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
283   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
284   ierr = ISDestroy(&sis);CHKERRQ(ierr);
285   ierr = ISDestroy(&gis);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 /*
290   Local utility routine that creates a mapping from the global column
291 number to the local number in the off-diagonal part of the local
292 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
293 a slightly higher hash table cost; without it it is not scalable (each processor
294 has an order N integer array but is fast to access.
295 */
296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
297 {
298   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
299   PetscErrorCode ierr;
300   PetscInt       n = aij->B->cmap->n,i;
301 
302   PetscFunctionBegin;
303   if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
304 #if defined(PETSC_USE_CTABLE)
305   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
306   for (i=0; i<n; i++) {
307     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
308   }
309 #else
310   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
311   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
312   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
313 #endif
314   PetscFunctionReturn(0);
315 }
316 
317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
318 { \
319     if (col <= lastcol1)  low1 = 0;     \
320     else                 high1 = nrow1; \
321     lastcol1 = col;\
322     while (high1-low1 > 5) { \
323       t = (low1+high1)/2; \
324       if (rp1[t] > col) high1 = t; \
325       else              low1  = t; \
326     } \
327       for (_i=low1; _i<high1; _i++) { \
328         if (rp1[_i] > col) break; \
329         if (rp1[_i] == col) { \
330           if (addv == ADD_VALUES) { \
331             ap1[_i] += value;   \
332             /* Not sure LogFlops will slow dow the code or not */ \
333             (void)PetscLogFlops(1.0);   \
334            } \
335           else                    ap1[_i] = value; \
336           inserted = PETSC_TRUE; \
337           goto a_noinsert; \
338         } \
339       }  \
340       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
341       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
342       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
343       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
344       N = nrow1++ - 1; a->nz++; high1++; \
345       /* shift up all the later entries in this row */ \
346       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
347       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
348       rp1[_i] = col;  \
349       ap1[_i] = value;  \
350       A->nonzerostate++;\
351       a_noinsert: ; \
352       ailen[row] = nrow1; \
353 }
354 
355 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
356   { \
357     if (col <= lastcol2) low2 = 0;                        \
358     else high2 = nrow2;                                   \
359     lastcol2 = col;                                       \
360     while (high2-low2 > 5) {                              \
361       t = (low2+high2)/2;                                 \
362       if (rp2[t] > col) high2 = t;                        \
363       else             low2  = t;                         \
364     }                                                     \
365     for (_i=low2; _i<high2; _i++) {                       \
366       if (rp2[_i] > col) break;                           \
367       if (rp2[_i] == col) {                               \
368         if (addv == ADD_VALUES) {                         \
369           ap2[_i] += value;                               \
370           (void)PetscLogFlops(1.0);                       \
371         }                                                 \
372         else                    ap2[_i] = value;          \
373         inserted = PETSC_TRUE;                            \
374         goto b_noinsert;                                  \
375       }                                                   \
376     }                                                     \
377     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
378     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
379     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
380     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
381     N = nrow2++ - 1; b->nz++; high2++;                    \
382     /* shift up all the later entries in this row */      \
383     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
384     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
385     rp2[_i] = col;                                        \
386     ap2[_i] = value;                                      \
387     B->nonzerostate++;                                    \
388     b_noinsert: ;                                         \
389     bilen[row] = nrow2;                                   \
390   }
391 
392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
393 {
394   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
395   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
396   PetscErrorCode ierr;
397   PetscInt       l,*garray = mat->garray,diag;
398 
399   PetscFunctionBegin;
400   /* code only works for square matrices A */
401 
402   /* find size of row to the left of the diagonal part */
403   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
404   row  = row - diag;
405   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
406     if (garray[b->j[b->i[row]+l]] > diag) break;
407   }
408   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
409 
410   /* diagonal part */
411   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
412 
413   /* right of diagonal part */
414   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
415 #if defined(PETSC_HAVE_DEVICE)
416   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
417 #endif
418   PetscFunctionReturn(0);
419 }
420 
421 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
422 {
423   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
424   PetscScalar    value = 0.0;
425   PetscErrorCode ierr;
426   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
427   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
428   PetscBool      roworiented = aij->roworiented;
429 
430   /* Some Variables required in the macro */
431   Mat        A                    = aij->A;
432   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
433   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
434   PetscBool  ignorezeroentries    = a->ignorezeroentries;
435   Mat        B                    = aij->B;
436   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
437   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
438   MatScalar  *aa,*ba;
439   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
440    * cannot use "#if defined" inside a macro. */
441   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
442 
443   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
444   PetscInt  nonew;
445   MatScalar *ap1,*ap2;
446 
447   PetscFunctionBegin;
448 #if defined(PETSC_HAVE_DEVICE)
449   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
450     const PetscScalar *dummy;
451     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
452     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
453   }
454   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
455     const PetscScalar *dummy;
456     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
457     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
458   }
459 #endif
460   aa = a->a;
461   ba = b->a;
462   for (i=0; i<m; i++) {
463     if (im[i] < 0) continue;
464     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
465     if (im[i] >= rstart && im[i] < rend) {
466       row      = im[i] - rstart;
467       lastcol1 = -1;
468       rp1      = aj + ai[row];
469       ap1      = aa + ai[row];
470       rmax1    = aimax[row];
471       nrow1    = ailen[row];
472       low1     = 0;
473       high1    = nrow1;
474       lastcol2 = -1;
475       rp2      = bj + bi[row];
476       ap2      = ba + bi[row];
477       rmax2    = bimax[row];
478       nrow2    = bilen[row];
479       low2     = 0;
480       high2    = nrow2;
481 
482       for (j=0; j<n; j++) {
483         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
484         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
485         if (in[j] >= cstart && in[j] < cend) {
486           col   = in[j] - cstart;
487           nonew = a->nonew;
488           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
489 #if defined(PETSC_HAVE_DEVICE)
490           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
491 #endif
492         } else if (in[j] < 0) continue;
493         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
494         else {
495           if (mat->was_assembled) {
496             if (!aij->colmap) {
497               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
498             }
499 #if defined(PETSC_USE_CTABLE)
500             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
501             col--;
502 #else
503             col = aij->colmap[in[j]] - 1;
504 #endif
505             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
506               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
507               col  =  in[j];
508               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
509               B        = aij->B;
510               b        = (Mat_SeqAIJ*)B->data;
511               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
512               rp2      = bj + bi[row];
513               ap2      = ba + bi[row];
514               rmax2    = bimax[row];
515               nrow2    = bilen[row];
516               low2     = 0;
517               high2    = nrow2;
518               bm       = aij->B->rmap->n;
519               ba       = b->a;
520               inserted = PETSC_FALSE;
521             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
522               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
523                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
524               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
525             }
526           } else col = in[j];
527           nonew = b->nonew;
528           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
529 #if defined(PETSC_HAVE_DEVICE)
530           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
531 #endif
532         }
533       }
534     } else {
535       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
536       if (!aij->donotstash) {
537         mat->assembled = PETSC_FALSE;
538         if (roworiented) {
539           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
540         } else {
541           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
542         }
543       }
544     }
545   }
546   PetscFunctionReturn(0);
547 }
548 
549 /*
550     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
551     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
552     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
553 */
554 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
555 {
556   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
557   Mat            A           = aij->A; /* diagonal part of the matrix */
558   Mat            B           = aij->B; /* offdiagonal part of the matrix */
559   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
560   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
561   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
562   PetscInt       *ailen      = a->ilen,*aj = a->j;
563   PetscInt       *bilen      = b->ilen,*bj = b->j;
564   PetscInt       am          = aij->A->rmap->n,j;
565   PetscInt       diag_so_far = 0,dnz;
566   PetscInt       offd_so_far = 0,onz;
567 
568   PetscFunctionBegin;
569   /* Iterate over all rows of the matrix */
570   for (j=0; j<am; j++) {
571     dnz = onz = 0;
572     /*  Iterate over all non-zero columns of the current row */
573     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
574       /* If column is in the diagonal */
575       if (mat_j[col] >= cstart && mat_j[col] < cend) {
576         aj[diag_so_far++] = mat_j[col] - cstart;
577         dnz++;
578       } else { /* off-diagonal entries */
579         bj[offd_so_far++] = mat_j[col];
580         onz++;
581       }
582     }
583     ailen[j] = dnz;
584     bilen[j] = onz;
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 /*
590     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
591     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
592     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
593     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
594     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
595 */
596 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
597 {
598   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
599   Mat            A      = aij->A; /* diagonal part of the matrix */
600   Mat            B      = aij->B; /* offdiagonal part of the matrix */
601   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
602   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
603   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
604   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
605   PetscInt       *ailen = a->ilen,*aj = a->j;
606   PetscInt       *bilen = b->ilen,*bj = b->j;
607   PetscInt       am     = aij->A->rmap->n,j;
608   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
609   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
610   PetscScalar    *aa = a->a,*ba = b->a;
611 
612   PetscFunctionBegin;
613   /* Iterate over all rows of the matrix */
614   for (j=0; j<am; j++) {
615     dnz_row = onz_row = 0;
616     rowstart_offd = full_offd_i[j];
617     rowstart_diag = full_diag_i[j];
618     /*  Iterate over all non-zero columns of the current row */
619     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
620       /* If column is in the diagonal */
621       if (mat_j[col] >= cstart && mat_j[col] < cend) {
622         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
623         aa[rowstart_diag+dnz_row] = mat_a[col];
624         dnz_row++;
625       } else { /* off-diagonal entries */
626         bj[rowstart_offd+onz_row] = mat_j[col];
627         ba[rowstart_offd+onz_row] = mat_a[col];
628         onz_row++;
629       }
630     }
631     ailen[j] = dnz_row;
632     bilen[j] = onz_row;
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
638 {
639   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
640   PetscErrorCode ierr;
641   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
642   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
643 
644   PetscFunctionBegin;
645   for (i=0; i<m; i++) {
646     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
647     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
648     if (idxm[i] >= rstart && idxm[i] < rend) {
649       row = idxm[i] - rstart;
650       for (j=0; j<n; j++) {
651         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
652         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
653         if (idxn[j] >= cstart && idxn[j] < cend) {
654           col  = idxn[j] - cstart;
655           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
656         } else {
657           if (!aij->colmap) {
658             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
659           }
660 #if defined(PETSC_USE_CTABLE)
661           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
662           col--;
663 #else
664           col = aij->colmap[idxn[j]] - 1;
665 #endif
666           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
667           else {
668             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
669           }
670         }
671       }
672     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
673   }
674   PetscFunctionReturn(0);
675 }
676 
677 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
678 {
679   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
680   PetscErrorCode ierr;
681   PetscInt       nstash,reallocs;
682 
683   PetscFunctionBegin;
684   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
685 
686   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
687   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
688   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
693 {
694   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
695   PetscErrorCode ierr;
696   PetscMPIInt    n;
697   PetscInt       i,j,rstart,ncols,flg;
698   PetscInt       *row,*col;
699   PetscBool      other_disassembled;
700   PetscScalar    *val;
701 
702   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
703 
704   PetscFunctionBegin;
705   if (!aij->donotstash && !mat->nooffprocentries) {
706     while (1) {
707       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
708       if (!flg) break;
709 
710       for (i=0; i<n;) {
711         /* Now identify the consecutive vals belonging to the same row */
712         for (j=i,rstart=row[j]; j<n; j++) {
713           if (row[j] != rstart) break;
714         }
715         if (j < n) ncols = j-i;
716         else       ncols = n-i;
717         /* Now assemble all these values with a single function call */
718         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
719         i    = j;
720       }
721     }
722     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
723   }
724 #if defined(PETSC_HAVE_DEVICE)
725   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
726   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
727   if (mat->boundtocpu) {
728     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
729     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
730   }
731 #endif
732   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
733   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
734 
735   /* determine if any processor has disassembled, if so we must
736      also disassemble ourself, in order that we may reassemble. */
737   /*
738      if nonzero structure of submatrix B cannot change then we know that
739      no processor disassembled thus we can skip this stuff
740   */
741   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
742     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
743     if (mat->was_assembled && !other_disassembled) {
744 #if defined(PETSC_HAVE_DEVICE)
745       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
746 #endif
747       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
748     }
749   }
750   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
751     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
752   }
753   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
754 #if defined(PETSC_HAVE_DEVICE)
755   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
756 #endif
757   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
758   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
759 
760   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
761 
762   aij->rowvalues = NULL;
763 
764   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
765 
766   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
767   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
768     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
769     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
770   }
771 #if defined(PETSC_HAVE_DEVICE)
772   mat->offloadmask = PETSC_OFFLOAD_BOTH;
773 #endif
774   PetscFunctionReturn(0);
775 }
776 
777 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
778 {
779   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
780   PetscErrorCode ierr;
781 
782   PetscFunctionBegin;
783   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
784   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
789 {
790   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
791   PetscObjectState sA, sB;
792   PetscInt        *lrows;
793   PetscInt         r, len;
794   PetscBool        cong, lch, gch;
795   PetscErrorCode   ierr;
796 
797   PetscFunctionBegin;
798   /* get locally owned rows */
799   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
800   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
801   /* fix right hand side if needed */
802   if (x && b) {
803     const PetscScalar *xx;
804     PetscScalar       *bb;
805 
806     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
807     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
808     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
809     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
810     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
811     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
812   }
813 
814   sA = mat->A->nonzerostate;
815   sB = mat->B->nonzerostate;
816 
817   if (diag != 0.0 && cong) {
818     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
819     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
820   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
821     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
822     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
823     PetscInt   nnwA, nnwB;
824     PetscBool  nnzA, nnzB;
825 
826     nnwA = aijA->nonew;
827     nnwB = aijB->nonew;
828     nnzA = aijA->keepnonzeropattern;
829     nnzB = aijB->keepnonzeropattern;
830     if (!nnzA) {
831       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
832       aijA->nonew = 0;
833     }
834     if (!nnzB) {
835       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
836       aijB->nonew = 0;
837     }
838     /* Must zero here before the next loop */
839     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
841     for (r = 0; r < len; ++r) {
842       const PetscInt row = lrows[r] + A->rmap->rstart;
843       if (row >= A->cmap->N) continue;
844       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
845     }
846     aijA->nonew = nnwA;
847     aijB->nonew = nnwB;
848   } else {
849     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
850     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
851   }
852   ierr = PetscFree(lrows);CHKERRQ(ierr);
853   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
854   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
855 
856   /* reduce nonzerostate */
857   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
858   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
859   if (gch) A->nonzerostate++;
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
864 {
865   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode    ierr;
867   PetscMPIInt       n = A->rmap->n;
868   PetscInt          i,j,r,m,len = 0;
869   PetscInt          *lrows,*owners = A->rmap->range;
870   PetscMPIInt       p = 0;
871   PetscSFNode       *rrows;
872   PetscSF           sf;
873   const PetscScalar *xx;
874   PetscScalar       *bb,*mask;
875   Vec               xmask,lmask;
876   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
877   const PetscInt    *aj, *ii,*ridx;
878   PetscScalar       *aa;
879 
880   PetscFunctionBegin;
881   /* Create SF where leaves are input rows and roots are owned rows */
882   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
883   for (r = 0; r < n; ++r) lrows[r] = -1;
884   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
885   for (r = 0; r < N; ++r) {
886     const PetscInt idx   = rows[r];
887     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
888     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
889       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
890     }
891     rrows[r].rank  = p;
892     rrows[r].index = rows[r] - owners[p];
893   }
894   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
895   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
896   /* Collect flags for rows to be zeroed */
897   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
898   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
899   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
900   /* Compress and put in row numbers */
901   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
902   /* zero diagonal part of matrix */
903   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
904   /* handle off diagonal part of matrix */
905   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
906   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
907   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
908   for (i=0; i<len; i++) bb[lrows[i]] = 1;
909   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
910   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
911   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
912   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
913   if (x && b) { /* this code is buggy when the row and column layout don't match */
914     PetscBool cong;
915 
916     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
917     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
918     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922   }
923   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
924   /* remove zeroed rows of off diagonal matrix */
925   ii = aij->i;
926   for (i=0; i<len; i++) {
927     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
928   }
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use) {
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++) {
935       n  = ii[i+1] - ii[i];
936       aj = aij->j + ii[i];
937       aa = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           if (b) bb[*ridx] -= *aa*xx[*aj];
942           *aa = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951     for (i=0; i<m; i++) {
952       n  = ii[i+1] - ii[i];
953       aj = aij->j + ii[i];
954       aa = aij->a + ii[i];
955       for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           if (b) bb[i] -= *aa*xx[*aj];
958           *aa = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965   if (x && b) {
966     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
968   }
969   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
970   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
971   ierr = PetscFree(lrows);CHKERRQ(ierr);
972 
973   /* only change matrix nonzero state if pattern was allowed to be changed */
974   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
975     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
976     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscInt       nt;
986   VecScatter     Mvctx = a->Mvctx;
987 
988   PetscFunctionBegin;
989   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
990   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
991   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
993   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1009 {
1010   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1011   PetscErrorCode ierr;
1012   VecScatter     Mvctx = a->Mvctx;
1013 
1014   PetscFunctionBegin;
1015   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1017   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1025   PetscErrorCode ierr;
1026 
1027   PetscFunctionBegin;
1028   /* do nondiagonal part */
1029   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1030   /* do local part */
1031   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1032   /* add partial results together */
1033   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1034   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1039 {
1040   MPI_Comm       comm;
1041   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1042   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1043   IS             Me,Notme;
1044   PetscErrorCode ierr;
1045   PetscInt       M,N,first,last,*notme,i;
1046   PetscBool      lf;
1047   PetscMPIInt    size;
1048 
1049   PetscFunctionBegin;
1050   /* Easy test: symmetric diagonal block */
1051   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1052   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1053   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1054   if (!*f) PetscFunctionReturn(0);
1055   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1056   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1057   if (size == 1) PetscFunctionReturn(0);
1058 
1059   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1060   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1061   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1062   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1063   for (i=0; i<first; i++) notme[i] = i;
1064   for (i=last; i<M; i++) notme[i-last+first] = i;
1065   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1066   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1067   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1068   Aoff = Aoffs[0];
1069   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1070   Boff = Boffs[0];
1071   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1072   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1073   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1074   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1075   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1076   ierr = PetscFree(notme);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1081 {
1082   PetscErrorCode ierr;
1083 
1084   PetscFunctionBegin;
1085   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1090 {
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1097   /* do local part */
1098   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1099   /* add partial results together */
1100   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1101   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 /*
1106   This only works correctly for square matrices where the subblock A->A is the
1107    diagonal block
1108 */
1109 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1110 {
1111   PetscErrorCode ierr;
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113 
1114   PetscFunctionBegin;
1115   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1116   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1117   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1122 {
1123   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1128   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1133 {
1134   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138 #if defined(PETSC_USE_LOG)
1139   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1140 #endif
1141   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1142   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1143   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1144   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1145 #if defined(PETSC_USE_CTABLE)
1146   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1147 #else
1148   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1149 #endif
1150   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1151   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1152   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1153   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1154   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1155   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1156 
1157   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1158   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1159 
1160   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1170 #if defined(PETSC_HAVE_CUDA)
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1172 #endif
1173 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1175 #endif
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1177 #if defined(PETSC_HAVE_ELEMENTAL)
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1179 #endif
1180 #if defined(PETSC_HAVE_SCALAPACK)
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1182 #endif
1183 #if defined(PETSC_HAVE_HYPRE)
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1186 #endif
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1192   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1193 #if defined(PETSC_HAVE_MKL_SPARSE)
1194   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1195 #endif
1196   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1203 {
1204   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1205   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1206   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1207   const PetscInt    *garray = aij->garray;
1208   const PetscScalar *aa,*ba;
1209   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1210   PetscInt          *rowlens;
1211   PetscInt          *colidxs;
1212   PetscScalar       *matvals;
1213   PetscErrorCode    ierr;
1214 
1215   PetscFunctionBegin;
1216   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1217 
1218   M  = mat->rmap->N;
1219   N  = mat->cmap->N;
1220   m  = mat->rmap->n;
1221   rs = mat->rmap->rstart;
1222   cs = mat->cmap->rstart;
1223   nz = A->nz + B->nz;
1224 
1225   /* write matrix header */
1226   header[0] = MAT_FILE_CLASSID;
1227   header[1] = M; header[2] = N; header[3] = nz;
1228   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1229   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1230 
1231   /* fill in and store row lengths  */
1232   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1233   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1234   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1235   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1236 
1237   /* fill in and store column indices */
1238   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1239   for (cnt=0, i=0; i<m; i++) {
1240     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1241       if (garray[B->j[jb]] > cs) break;
1242       colidxs[cnt++] = garray[B->j[jb]];
1243     }
1244     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1245       colidxs[cnt++] = A->j[ja] + cs;
1246     for (; jb<B->i[i+1]; jb++)
1247       colidxs[cnt++] = garray[B->j[jb]];
1248   }
1249   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1250   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1251   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1252 
1253   /* fill in and store nonzero values */
1254   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1255   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1256   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1257   for (cnt=0, i=0; i<m; i++) {
1258     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1259       if (garray[B->j[jb]] > cs) break;
1260       matvals[cnt++] = ba[jb];
1261     }
1262     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1263       matvals[cnt++] = aa[ja];
1264     for (; jb<B->i[i+1]; jb++)
1265       matvals[cnt++] = ba[jb];
1266   }
1267   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1268   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1269   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1270   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1271   ierr = PetscFree(matvals);CHKERRQ(ierr);
1272 
1273   /* write block size option to the viewer's .info file */
1274   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #include <petscdraw.h>
1279 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1280 {
1281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1282   PetscErrorCode    ierr;
1283   PetscMPIInt       rank = aij->rank,size = aij->size;
1284   PetscBool         isdraw,iascii,isbinary;
1285   PetscViewer       sviewer;
1286   PetscViewerFormat format;
1287 
1288   PetscFunctionBegin;
1289   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1290   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1291   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1292   if (iascii) {
1293     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1294     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1295       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1296       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1297       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1298       for (i=0; i<(PetscInt)size; i++) {
1299         nmax = PetscMax(nmax,nz[i]);
1300         nmin = PetscMin(nmin,nz[i]);
1301         navg += nz[i];
1302       }
1303       ierr = PetscFree(nz);CHKERRQ(ierr);
1304       navg = navg/size;
1305       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1306       PetscFunctionReturn(0);
1307     }
1308     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1309     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1310       MatInfo   info;
1311       PetscInt *inodes=NULL;
1312 
1313       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1314       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1315       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1317       if (!inodes) {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1320       } else {
1321         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1322                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1323       }
1324       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1326       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1328       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1329       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1330       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1331       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1332       PetscFunctionReturn(0);
1333     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1334       PetscInt inodecount,inodelimit,*inodes;
1335       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1336       if (inodes) {
1337         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1338       } else {
1339         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1340       }
1341       PetscFunctionReturn(0);
1342     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1343       PetscFunctionReturn(0);
1344     }
1345   } else if (isbinary) {
1346     if (size == 1) {
1347       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1348       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1349     } else {
1350       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1351     }
1352     PetscFunctionReturn(0);
1353   } else if (iascii && size == 1) {
1354     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1355     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1356     PetscFunctionReturn(0);
1357   } else if (isdraw) {
1358     PetscDraw draw;
1359     PetscBool isnull;
1360     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1361     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1362     if (isnull) PetscFunctionReturn(0);
1363   }
1364 
1365   { /* assemble the entire matrix onto first processor */
1366     Mat A = NULL, Av;
1367     IS  isrow,iscol;
1368 
1369     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1370     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1371     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1372     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1373 /*  The commented code uses MatCreateSubMatrices instead */
1374 /*
1375     Mat *AA, A = NULL, Av;
1376     IS  isrow,iscol;
1377 
1378     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1379     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1380     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1381     if (rank == 0) {
1382        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1383        A    = AA[0];
1384        Av   = AA[0];
1385     }
1386     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1387 */
1388     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1389     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1390     /*
1391        Everyone has to call to draw the matrix since the graphics waits are
1392        synchronized across all processors that share the PetscDraw object
1393     */
1394     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1395     if (rank == 0) {
1396       if (((PetscObject)mat)->name) {
1397         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1398       }
1399       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1400     }
1401     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1402     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1403     ierr = MatDestroy(&A);CHKERRQ(ierr);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1409 {
1410   PetscErrorCode ierr;
1411   PetscBool      iascii,isdraw,issocket,isbinary;
1412 
1413   PetscFunctionBegin;
1414   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1415   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1416   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1417   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1418   if (iascii || isdraw || isbinary || issocket) {
1419     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1425 {
1426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1427   PetscErrorCode ierr;
1428   Vec            bb1 = NULL;
1429   PetscBool      hasop;
1430 
1431   PetscFunctionBegin;
1432   if (flag == SOR_APPLY_UPPER) {
1433     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1434     PetscFunctionReturn(0);
1435   }
1436 
1437   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1438     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1439   }
1440 
1441   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1442     if (flag & SOR_ZERO_INITIAL_GUESS) {
1443       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1444       its--;
1445     }
1446 
1447     while (its--) {
1448       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1449       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1453       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1454 
1455       /* local sweep */
1456       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1457     }
1458   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1461       its--;
1462     }
1463     while (its--) {
1464       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1469       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1470 
1471       /* local sweep */
1472       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1473     }
1474   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1475     if (flag & SOR_ZERO_INITIAL_GUESS) {
1476       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1477       its--;
1478     }
1479     while (its--) {
1480       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1481       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1482 
1483       /* update rhs: bb1 = bb - B*x */
1484       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1485       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1486 
1487       /* local sweep */
1488       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1489     }
1490   } else if (flag & SOR_EISENSTAT) {
1491     Vec xx1;
1492 
1493     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1494     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1495 
1496     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498     if (!mat->diag) {
1499       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1500       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1501     }
1502     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1503     if (hasop) {
1504       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1505     } else {
1506       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1507     }
1508     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1509 
1510     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1511 
1512     /* local sweep */
1513     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1514     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1515     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1516   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1517 
1518   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1519 
1520   matin->factorerrortype = mat->A->factorerrortype;
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1525 {
1526   Mat            aA,aB,Aperm;
1527   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1528   PetscScalar    *aa,*ba;
1529   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1530   PetscSF        rowsf,sf;
1531   IS             parcolp = NULL;
1532   PetscBool      done;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1537   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1538   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1539   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1540 
1541   /* Invert row permutation to find out where my rows should go */
1542   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1543   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1544   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1545   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1546   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1547   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1548 
1549   /* Invert column permutation to find out where my columns should go */
1550   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1551   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1552   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1553   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1554   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1555   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1556   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1557 
1558   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1559   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1560   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1561 
1562   /* Find out where my gcols should go */
1563   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1564   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1565   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1566   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1567   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1568   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1570   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1571 
1572   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1573   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1574   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1575   for (i=0; i<m; i++) {
1576     PetscInt    row = rdest[i];
1577     PetscMPIInt rowner;
1578     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1579     for (j=ai[i]; j<ai[i+1]; j++) {
1580       PetscInt    col = cdest[aj[j]];
1581       PetscMPIInt cowner;
1582       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1583       if (rowner == cowner) dnnz[i]++;
1584       else onnz[i]++;
1585     }
1586     for (j=bi[i]; j<bi[i+1]; j++) {
1587       PetscInt    col = gcdest[bj[j]];
1588       PetscMPIInt cowner;
1589       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1590       if (rowner == cowner) dnnz[i]++;
1591       else onnz[i]++;
1592     }
1593   }
1594   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1595   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1596   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1597   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1598   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1599 
1600   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1601   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1602   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1603   for (i=0; i<m; i++) {
1604     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1605     PetscInt j0,rowlen;
1606     rowlen = ai[i+1] - ai[i];
1607     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1608       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1609       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1610     }
1611     rowlen = bi[i+1] - bi[i];
1612     for (j0=j=0; j<rowlen; j0=j) {
1613       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1614       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1615     }
1616   }
1617   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1618   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1619   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1620   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1621   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1622   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1623   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1624   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1625   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1626   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1627   *B = Aperm;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1632 {
1633   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1634   PetscErrorCode ierr;
1635 
1636   PetscFunctionBegin;
1637   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1638   if (ghosts) *ghosts = aij->garray;
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1643 {
1644   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1645   Mat            A    = mat->A,B = mat->B;
1646   PetscErrorCode ierr;
1647   PetscLogDouble isend[5],irecv[5];
1648 
1649   PetscFunctionBegin;
1650   info->block_size = 1.0;
1651   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1654   isend[3] = info->memory;  isend[4] = info->mallocs;
1655 
1656   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1657 
1658   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1659   isend[3] += info->memory;  isend[4] += info->mallocs;
1660   if (flag == MAT_LOCAL) {
1661     info->nz_used      = isend[0];
1662     info->nz_allocated = isend[1];
1663     info->nz_unneeded  = isend[2];
1664     info->memory       = isend[3];
1665     info->mallocs      = isend[4];
1666   } else if (flag == MAT_GLOBAL_MAX) {
1667     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1668 
1669     info->nz_used      = irecv[0];
1670     info->nz_allocated = irecv[1];
1671     info->nz_unneeded  = irecv[2];
1672     info->memory       = irecv[3];
1673     info->mallocs      = irecv[4];
1674   } else if (flag == MAT_GLOBAL_SUM) {
1675     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1676 
1677     info->nz_used      = irecv[0];
1678     info->nz_allocated = irecv[1];
1679     info->nz_unneeded  = irecv[2];
1680     info->memory       = irecv[3];
1681     info->mallocs      = irecv[4];
1682   }
1683   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1684   info->fill_ratio_needed = 0;
1685   info->factor_mallocs    = 0;
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1690 {
1691   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1692   PetscErrorCode ierr;
1693 
1694   PetscFunctionBegin;
1695   switch (op) {
1696   case MAT_NEW_NONZERO_LOCATIONS:
1697   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1698   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1699   case MAT_KEEP_NONZERO_PATTERN:
1700   case MAT_NEW_NONZERO_LOCATION_ERR:
1701   case MAT_USE_INODES:
1702   case MAT_IGNORE_ZERO_ENTRIES:
1703   case MAT_FORM_EXPLICIT_TRANSPOSE:
1704     MatCheckPreallocated(A,1);
1705     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1706     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1707     break;
1708   case MAT_ROW_ORIENTED:
1709     MatCheckPreallocated(A,1);
1710     a->roworiented = flg;
1711 
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1714     break;
1715   case MAT_FORCE_DIAGONAL_ENTRIES:
1716   case MAT_SORTED_FULL:
1717     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1718     break;
1719   case MAT_IGNORE_OFF_PROC_ENTRIES:
1720     a->donotstash = flg;
1721     break;
1722   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1723   case MAT_SPD:
1724   case MAT_SYMMETRIC:
1725   case MAT_STRUCTURALLY_SYMMETRIC:
1726   case MAT_HERMITIAN:
1727   case MAT_SYMMETRY_ETERNAL:
1728     break;
1729   case MAT_SUBMAT_SINGLEIS:
1730     A->submat_singleis = flg;
1731     break;
1732   case MAT_STRUCTURE_ONLY:
1733     /* The option is handled directly by MatSetOption() */
1734     break;
1735   default:
1736     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1742 {
1743   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1744   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1745   PetscErrorCode ierr;
1746   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1747   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1748   PetscInt       *cmap,*idx_p;
1749 
1750   PetscFunctionBegin;
1751   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1752   mat->getrowactive = PETSC_TRUE;
1753 
1754   if (!mat->rowvalues && (idx || v)) {
1755     /*
1756         allocate enough space to hold information from the longest row.
1757     */
1758     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1759     PetscInt   max = 1,tmp;
1760     for (i=0; i<matin->rmap->n; i++) {
1761       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1762       if (max < tmp) max = tmp;
1763     }
1764     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1765   }
1766 
1767   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1768   lrow = row - rstart;
1769 
1770   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1771   if (!v)   {pvA = NULL; pvB = NULL;}
1772   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1773   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1774   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1775   nztot = nzA + nzB;
1776 
1777   cmap = mat->garray;
1778   if (v  || idx) {
1779     if (nztot) {
1780       /* Sort by increasing column numbers, assuming A and B already sorted */
1781       PetscInt imark = -1;
1782       if (v) {
1783         *v = v_p = mat->rowvalues;
1784         for (i=0; i<nzB; i++) {
1785           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1786           else break;
1787         }
1788         imark = i;
1789         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1790         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1791       }
1792       if (idx) {
1793         *idx = idx_p = mat->rowindices;
1794         if (imark > -1) {
1795           for (i=0; i<imark; i++) {
1796             idx_p[i] = cmap[cworkB[i]];
1797           }
1798         } else {
1799           for (i=0; i<nzB; i++) {
1800             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1801             else break;
1802           }
1803           imark = i;
1804         }
1805         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1806         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1807       }
1808     } else {
1809       if (idx) *idx = NULL;
1810       if (v)   *v   = NULL;
1811     }
1812   }
1813   *nz  = nztot;
1814   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1815   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1820 {
1821   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1822 
1823   PetscFunctionBegin;
1824   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1825   aij->getrowactive = PETSC_FALSE;
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1830 {
1831   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1832   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1833   PetscErrorCode ierr;
1834   PetscInt       i,j,cstart = mat->cmap->rstart;
1835   PetscReal      sum = 0.0;
1836   MatScalar      *v;
1837 
1838   PetscFunctionBegin;
1839   if (aij->size == 1) {
1840     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1841   } else {
1842     if (type == NORM_FROBENIUS) {
1843       v = amat->a;
1844       for (i=0; i<amat->nz; i++) {
1845         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1846       }
1847       v = bmat->a;
1848       for (i=0; i<bmat->nz; i++) {
1849         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1850       }
1851       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1852       *norm = PetscSqrtReal(*norm);
1853       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1854     } else if (type == NORM_1) { /* max column norm */
1855       PetscReal *tmp,*tmp2;
1856       PetscInt  *jj,*garray = aij->garray;
1857       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1858       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1859       *norm = 0.0;
1860       v     = amat->a; jj = amat->j;
1861       for (j=0; j<amat->nz; j++) {
1862         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1863       }
1864       v = bmat->a; jj = bmat->j;
1865       for (j=0; j<bmat->nz; j++) {
1866         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1867       }
1868       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1869       for (j=0; j<mat->cmap->N; j++) {
1870         if (tmp2[j] > *norm) *norm = tmp2[j];
1871       }
1872       ierr = PetscFree(tmp);CHKERRQ(ierr);
1873       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1874       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1875     } else if (type == NORM_INFINITY) { /* max row norm */
1876       PetscReal ntemp = 0.0;
1877       for (j=0; j<aij->A->rmap->n; j++) {
1878         v   = amat->a + amat->i[j];
1879         sum = 0.0;
1880         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v); v++;
1882         }
1883         v = bmat->a + bmat->i[j];
1884         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1885           sum += PetscAbsScalar(*v); v++;
1886         }
1887         if (sum > ntemp) ntemp = sum;
1888       }
1889       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1890       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1891     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1897 {
1898   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1899   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1900   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1901   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1902   PetscErrorCode  ierr;
1903   Mat             B,A_diag,*B_diag;
1904   const MatScalar *pbv,*bv;
1905 
1906   PetscFunctionBegin;
1907   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1908   ai = Aloc->i; aj = Aloc->j;
1909   bi = Bloc->i; bj = Bloc->j;
1910   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1911     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1912     PetscSFNode          *oloc;
1913     PETSC_UNUSED PetscSF sf;
1914 
1915     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1916     /* compute d_nnz for preallocation */
1917     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1918     for (i=0; i<ai[ma]; i++) {
1919       d_nnz[aj[i]]++;
1920     }
1921     /* compute local off-diagonal contributions */
1922     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1923     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1924     /* map those to global */
1925     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1926     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1927     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1928     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1929     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1930     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1931     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1932 
1933     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1934     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1935     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1936     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1937     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1938     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1939   } else {
1940     B    = *matout;
1941     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1942   }
1943 
1944   b           = (Mat_MPIAIJ*)B->data;
1945   A_diag      = a->A;
1946   B_diag      = &b->A;
1947   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1948   A_diag_ncol = A_diag->cmap->N;
1949   B_diag_ilen = sub_B_diag->ilen;
1950   B_diag_i    = sub_B_diag->i;
1951 
1952   /* Set ilen for diagonal of B */
1953   for (i=0; i<A_diag_ncol; i++) {
1954     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1955   }
1956 
1957   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1958   very quickly (=without using MatSetValues), because all writes are local. */
1959   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1960 
1961   /* copy over the B part */
1962   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1963   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1964   pbv  = bv;
1965   row  = A->rmap->rstart;
1966   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1967   cols_tmp = cols;
1968   for (i=0; i<mb; i++) {
1969     ncol = bi[i+1]-bi[i];
1970     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1971     row++;
1972     pbv += ncol; cols_tmp += ncol;
1973   }
1974   ierr = PetscFree(cols);CHKERRQ(ierr);
1975   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1976 
1977   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1978   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1979   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1980     *matout = B;
1981   } else {
1982     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1988 {
1989   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1990   Mat            a    = aij->A,b = aij->B;
1991   PetscErrorCode ierr;
1992   PetscInt       s1,s2,s3;
1993 
1994   PetscFunctionBegin;
1995   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1996   if (rr) {
1997     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1998     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1999     /* Overlap communication with computation. */
2000     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2001   }
2002   if (ll) {
2003     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2004     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2005     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2006   }
2007   /* scale  the diagonal block */
2008   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2009 
2010   if (rr) {
2011     /* Do a scatter end and then right scale the off-diagonal block */
2012     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2013     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2014   }
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2019 {
2020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2021   PetscErrorCode ierr;
2022 
2023   PetscFunctionBegin;
2024   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2029 {
2030   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2031   Mat            a,b,c,d;
2032   PetscBool      flg;
2033   PetscErrorCode ierr;
2034 
2035   PetscFunctionBegin;
2036   a = matA->A; b = matA->B;
2037   c = matB->A; d = matB->B;
2038 
2039   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2040   if (flg) {
2041     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2042   }
2043   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2048 {
2049   PetscErrorCode ierr;
2050   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2051   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2052 
2053   PetscFunctionBegin;
2054   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2055   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2056     /* because of the column compression in the off-processor part of the matrix a->B,
2057        the number of columns in a->B and b->B may be different, hence we cannot call
2058        the MatCopy() directly on the two parts. If need be, we can provide a more
2059        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2060        then copying the submatrices */
2061     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2062   } else {
2063     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2064     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2065   }
2066   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2071 {
2072   PetscErrorCode ierr;
2073 
2074   PetscFunctionBegin;
2075   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 /*
2080    Computes the number of nonzeros per row needed for preallocation when X and Y
2081    have different nonzero structure.
2082 */
2083 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2084 {
2085   PetscInt       i,j,k,nzx,nzy;
2086 
2087   PetscFunctionBegin;
2088   /* Set the number of nonzeros in the new matrix */
2089   for (i=0; i<m; i++) {
2090     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2091     nzx = xi[i+1] - xi[i];
2092     nzy = yi[i+1] - yi[i];
2093     nnz[i] = 0;
2094     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2095       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2096       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2097       nnz[i]++;
2098     }
2099     for (; k<nzy; k++) nnz[i]++;
2100   }
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2105 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2106 {
2107   PetscErrorCode ierr;
2108   PetscInt       m = Y->rmap->N;
2109   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2110   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2111 
2112   PetscFunctionBegin;
2113   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2118 {
2119   PetscErrorCode ierr;
2120   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2121 
2122   PetscFunctionBegin;
2123   if (str == SAME_NONZERO_PATTERN) {
2124     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2125     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2126   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2127     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2128   } else {
2129     Mat      B;
2130     PetscInt *nnz_d,*nnz_o;
2131 
2132     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2133     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2134     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2135     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2136     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2137     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2138     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2139     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2140     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2141     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2142     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2143     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2144     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2145   }
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2150 
2151 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2152 {
2153 #if defined(PETSC_USE_COMPLEX)
2154   PetscErrorCode ierr;
2155   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2156 
2157   PetscFunctionBegin;
2158   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2159   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2160 #else
2161   PetscFunctionBegin;
2162 #endif
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2167 {
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2173   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2178 {
2179   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2180   PetscErrorCode ierr;
2181 
2182   PetscFunctionBegin;
2183   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2184   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2189 {
2190   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2191   PetscErrorCode    ierr;
2192   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2193   PetscScalar       *va,*vv;
2194   Vec               vB,vA;
2195   const PetscScalar *vb;
2196 
2197   PetscFunctionBegin;
2198   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2199   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2200 
2201   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2202   if (idx) {
2203     for (i=0; i<m; i++) {
2204       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2205     }
2206   }
2207 
2208   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2209   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2210   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2211 
2212   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2213   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2214   for (i=0; i<m; i++) {
2215     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2216       vv[i] = vb[i];
2217       if (idx) idx[i] = a->garray[idxb[i]];
2218     } else {
2219       vv[i] = va[i];
2220       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2221         idx[i] = a->garray[idxb[i]];
2222     }
2223   }
2224   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2225   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2226   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2227   ierr = PetscFree(idxb);CHKERRQ(ierr);
2228   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2229   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2234 {
2235   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2236   PetscInt          m = A->rmap->n,n = A->cmap->n;
2237   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2238   PetscInt          *cmap  = mat->garray;
2239   PetscInt          *diagIdx, *offdiagIdx;
2240   Vec               diagV, offdiagV;
2241   PetscScalar       *a, *diagA, *offdiagA;
2242   const PetscScalar *ba,*bav;
2243   PetscInt          r,j,col,ncols,*bi,*bj;
2244   PetscErrorCode    ierr;
2245   Mat               B = mat->B;
2246   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2247 
2248   PetscFunctionBegin;
2249   /* When a process holds entire A and other processes have no entry */
2250   if (A->cmap->N == n) {
2251     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2252     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2253     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2254     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2255     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2256     PetscFunctionReturn(0);
2257   } else if (n == 0) {
2258     if (m) {
2259       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2260       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2261       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2262     }
2263     PetscFunctionReturn(0);
2264   }
2265 
2266   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2267   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2268   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2269   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2270 
2271   /* Get offdiagIdx[] for implicit 0.0 */
2272   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2273   ba   = bav;
2274   bi   = b->i;
2275   bj   = b->j;
2276   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2277   for (r = 0; r < m; r++) {
2278     ncols = bi[r+1] - bi[r];
2279     if (ncols == A->cmap->N - n) { /* Brow is dense */
2280       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2281     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2282       offdiagA[r] = 0.0;
2283 
2284       /* Find first hole in the cmap */
2285       for (j=0; j<ncols; j++) {
2286         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2287         if (col > j && j < cstart) {
2288           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2289           break;
2290         } else if (col > j + n && j >= cstart) {
2291           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2292           break;
2293         }
2294       }
2295       if (j == ncols && ncols < A->cmap->N - n) {
2296         /* a hole is outside compressed Bcols */
2297         if (ncols == 0) {
2298           if (cstart) {
2299             offdiagIdx[r] = 0;
2300           } else offdiagIdx[r] = cend;
2301         } else { /* ncols > 0 */
2302           offdiagIdx[r] = cmap[ncols-1] + 1;
2303           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2304         }
2305       }
2306     }
2307 
2308     for (j=0; j<ncols; j++) {
2309       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2310       ba++; bj++;
2311     }
2312   }
2313 
2314   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2315   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2316   for (r = 0; r < m; ++r) {
2317     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2318       a[r]   = diagA[r];
2319       if (idx) idx[r] = cstart + diagIdx[r];
2320     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2321       a[r] = diagA[r];
2322       if (idx) {
2323         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2324           idx[r] = cstart + diagIdx[r];
2325         } else idx[r] = offdiagIdx[r];
2326       }
2327     } else {
2328       a[r]   = offdiagA[r];
2329       if (idx) idx[r] = offdiagIdx[r];
2330     }
2331   }
2332   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2333   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2334   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2335   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2336   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2337   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2338   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2343 {
2344   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2345   PetscInt          m = A->rmap->n,n = A->cmap->n;
2346   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2347   PetscInt          *cmap  = mat->garray;
2348   PetscInt          *diagIdx, *offdiagIdx;
2349   Vec               diagV, offdiagV;
2350   PetscScalar       *a, *diagA, *offdiagA;
2351   const PetscScalar *ba,*bav;
2352   PetscInt          r,j,col,ncols,*bi,*bj;
2353   PetscErrorCode    ierr;
2354   Mat               B = mat->B;
2355   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2356 
2357   PetscFunctionBegin;
2358   /* When a process holds entire A and other processes have no entry */
2359   if (A->cmap->N == n) {
2360     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2361     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2362     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2363     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2364     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2365     PetscFunctionReturn(0);
2366   } else if (n == 0) {
2367     if (m) {
2368       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2369       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2370       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2371     }
2372     PetscFunctionReturn(0);
2373   }
2374 
2375   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2376   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2377   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2378   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2379 
2380   /* Get offdiagIdx[] for implicit 0.0 */
2381   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2382   ba   = bav;
2383   bi   = b->i;
2384   bj   = b->j;
2385   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2386   for (r = 0; r < m; r++) {
2387     ncols = bi[r+1] - bi[r];
2388     if (ncols == A->cmap->N - n) { /* Brow is dense */
2389       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2390     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2391       offdiagA[r] = 0.0;
2392 
2393       /* Find first hole in the cmap */
2394       for (j=0; j<ncols; j++) {
2395         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2396         if (col > j && j < cstart) {
2397           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2398           break;
2399         } else if (col > j + n && j >= cstart) {
2400           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2401           break;
2402         }
2403       }
2404       if (j == ncols && ncols < A->cmap->N - n) {
2405         /* a hole is outside compressed Bcols */
2406         if (ncols == 0) {
2407           if (cstart) {
2408             offdiagIdx[r] = 0;
2409           } else offdiagIdx[r] = cend;
2410         } else { /* ncols > 0 */
2411           offdiagIdx[r] = cmap[ncols-1] + 1;
2412           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2413         }
2414       }
2415     }
2416 
2417     for (j=0; j<ncols; j++) {
2418       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2419       ba++; bj++;
2420     }
2421   }
2422 
2423   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2424   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2425   for (r = 0; r < m; ++r) {
2426     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2427       a[r]   = diagA[r];
2428       if (idx) idx[r] = cstart + diagIdx[r];
2429     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2430       a[r] = diagA[r];
2431       if (idx) {
2432         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2433           idx[r] = cstart + diagIdx[r];
2434         } else idx[r] = offdiagIdx[r];
2435       }
2436     } else {
2437       a[r]   = offdiagA[r];
2438       if (idx) idx[r] = offdiagIdx[r];
2439     }
2440   }
2441   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2442   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2443   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2444   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2445   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2446   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2447   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2452 {
2453   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2454   PetscInt          m = A->rmap->n,n = A->cmap->n;
2455   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2456   PetscInt          *cmap  = mat->garray;
2457   PetscInt          *diagIdx, *offdiagIdx;
2458   Vec               diagV, offdiagV;
2459   PetscScalar       *a, *diagA, *offdiagA;
2460   const PetscScalar *ba,*bav;
2461   PetscInt          r,j,col,ncols,*bi,*bj;
2462   PetscErrorCode    ierr;
2463   Mat               B = mat->B;
2464   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2465 
2466   PetscFunctionBegin;
2467   /* When a process holds entire A and other processes have no entry */
2468   if (A->cmap->N == n) {
2469     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2470     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2471     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2472     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2473     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2474     PetscFunctionReturn(0);
2475   } else if (n == 0) {
2476     if (m) {
2477       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2478       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2479       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2480     }
2481     PetscFunctionReturn(0);
2482   }
2483 
2484   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2485   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2486   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2487   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2488 
2489   /* Get offdiagIdx[] for implicit 0.0 */
2490   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2491   ba   = bav;
2492   bi   = b->i;
2493   bj   = b->j;
2494   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2495   for (r = 0; r < m; r++) {
2496     ncols = bi[r+1] - bi[r];
2497     if (ncols == A->cmap->N - n) { /* Brow is dense */
2498       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2499     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2500       offdiagA[r] = 0.0;
2501 
2502       /* Find first hole in the cmap */
2503       for (j=0; j<ncols; j++) {
2504         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2505         if (col > j && j < cstart) {
2506           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2507           break;
2508         } else if (col > j + n && j >= cstart) {
2509           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2510           break;
2511         }
2512       }
2513       if (j == ncols && ncols < A->cmap->N - n) {
2514         /* a hole is outside compressed Bcols */
2515         if (ncols == 0) {
2516           if (cstart) {
2517             offdiagIdx[r] = 0;
2518           } else offdiagIdx[r] = cend;
2519         } else { /* ncols > 0 */
2520           offdiagIdx[r] = cmap[ncols-1] + 1;
2521           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2522         }
2523       }
2524     }
2525 
2526     for (j=0; j<ncols; j++) {
2527       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2528       ba++; bj++;
2529     }
2530   }
2531 
2532   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2533   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2534   for (r = 0; r < m; ++r) {
2535     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2536       a[r] = diagA[r];
2537       if (idx) idx[r] = cstart + diagIdx[r];
2538     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2539       a[r] = diagA[r];
2540       if (idx) {
2541         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2542           idx[r] = cstart + diagIdx[r];
2543         } else idx[r] = offdiagIdx[r];
2544       }
2545     } else {
2546       a[r] = offdiagA[r];
2547       if (idx) idx[r] = offdiagIdx[r];
2548     }
2549   }
2550   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2551   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2552   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2553   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2554   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2555   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2556   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2561 {
2562   PetscErrorCode ierr;
2563   Mat            *dummy;
2564 
2565   PetscFunctionBegin;
2566   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2567   *newmat = *dummy;
2568   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2573 {
2574   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2575   PetscErrorCode ierr;
2576 
2577   PetscFunctionBegin;
2578   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2579   A->factorerrortype = a->A->factorerrortype;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2584 {
2585   PetscErrorCode ierr;
2586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2587 
2588   PetscFunctionBegin;
2589   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2590   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2591   if (x->assembled) {
2592     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2593   } else {
2594     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2595   }
2596   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2597   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2602 {
2603   PetscFunctionBegin;
2604   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2605   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2606   PetscFunctionReturn(0);
2607 }
2608 
2609 /*@
2610    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2611 
2612    Collective on Mat
2613 
2614    Input Parameters:
2615 +    A - the matrix
2616 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2617 
2618  Level: advanced
2619 
2620 @*/
2621 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2622 {
2623   PetscErrorCode       ierr;
2624 
2625   PetscFunctionBegin;
2626   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2631 {
2632   PetscErrorCode       ierr;
2633   PetscBool            sc = PETSC_FALSE,flg;
2634 
2635   PetscFunctionBegin;
2636   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2637   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2638   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2639   if (flg) {
2640     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2641   }
2642   ierr = PetscOptionsTail();CHKERRQ(ierr);
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2647 {
2648   PetscErrorCode ierr;
2649   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2650   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2651 
2652   PetscFunctionBegin;
2653   if (!Y->preallocated) {
2654     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2655   } else if (!aij->nz) {
2656     PetscInt nonew = aij->nonew;
2657     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2658     aij->nonew = nonew;
2659   }
2660   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2665 {
2666   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2667   PetscErrorCode ierr;
2668 
2669   PetscFunctionBegin;
2670   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2671   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2672   if (d) {
2673     PetscInt rstart;
2674     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2675     *d += rstart;
2676 
2677   }
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2682 {
2683   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2684   PetscErrorCode ierr;
2685 
2686   PetscFunctionBegin;
2687   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 /* -------------------------------------------------------------------*/
2692 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2693                                        MatGetRow_MPIAIJ,
2694                                        MatRestoreRow_MPIAIJ,
2695                                        MatMult_MPIAIJ,
2696                                 /* 4*/ MatMultAdd_MPIAIJ,
2697                                        MatMultTranspose_MPIAIJ,
2698                                        MatMultTransposeAdd_MPIAIJ,
2699                                        NULL,
2700                                        NULL,
2701                                        NULL,
2702                                 /*10*/ NULL,
2703                                        NULL,
2704                                        NULL,
2705                                        MatSOR_MPIAIJ,
2706                                        MatTranspose_MPIAIJ,
2707                                 /*15*/ MatGetInfo_MPIAIJ,
2708                                        MatEqual_MPIAIJ,
2709                                        MatGetDiagonal_MPIAIJ,
2710                                        MatDiagonalScale_MPIAIJ,
2711                                        MatNorm_MPIAIJ,
2712                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2713                                        MatAssemblyEnd_MPIAIJ,
2714                                        MatSetOption_MPIAIJ,
2715                                        MatZeroEntries_MPIAIJ,
2716                                 /*24*/ MatZeroRows_MPIAIJ,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                 /*29*/ MatSetUp_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        MatGetDiagonalBlock_MPIAIJ,
2725                                        NULL,
2726                                 /*34*/ MatDuplicate_MPIAIJ,
2727                                        NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        NULL,
2731                                 /*39*/ MatAXPY_MPIAIJ,
2732                                        MatCreateSubMatrices_MPIAIJ,
2733                                        MatIncreaseOverlap_MPIAIJ,
2734                                        MatGetValues_MPIAIJ,
2735                                        MatCopy_MPIAIJ,
2736                                 /*44*/ MatGetRowMax_MPIAIJ,
2737                                        MatScale_MPIAIJ,
2738                                        MatShift_MPIAIJ,
2739                                        MatDiagonalSet_MPIAIJ,
2740                                        MatZeroRowsColumns_MPIAIJ,
2741                                 /*49*/ MatSetRandom_MPIAIJ,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2747                                        NULL,
2748                                        MatSetUnfactored_MPIAIJ,
2749                                        MatPermute_MPIAIJ,
2750                                        NULL,
2751                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2752                                        MatDestroy_MPIAIJ,
2753                                        MatView_MPIAIJ,
2754                                        NULL,
2755                                        NULL,
2756                                 /*64*/ NULL,
2757                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2762                                        MatGetRowMinAbs_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        NULL,
2766                                        NULL,
2767                                 /*75*/ MatFDColoringApply_AIJ,
2768                                        MatSetFromOptions_MPIAIJ,
2769                                        NULL,
2770                                        NULL,
2771                                        MatFindZeroDiagonals_MPIAIJ,
2772                                 /*80*/ NULL,
2773                                        NULL,
2774                                        NULL,
2775                                 /*83*/ MatLoad_MPIAIJ,
2776                                        MatIsSymmetric_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        NULL,
2780                                        NULL,
2781                                 /*89*/ NULL,
2782                                        NULL,
2783                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2787                                        NULL,
2788                                        NULL,
2789                                        NULL,
2790                                        MatBindToCPU_MPIAIJ,
2791                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2792                                        NULL,
2793                                        NULL,
2794                                        MatConjugate_MPIAIJ,
2795                                        NULL,
2796                                 /*104*/MatSetValuesRow_MPIAIJ,
2797                                        MatRealPart_MPIAIJ,
2798                                        MatImaginaryPart_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                 /*109*/NULL,
2802                                        NULL,
2803                                        MatGetRowMin_MPIAIJ,
2804                                        NULL,
2805                                        MatMissingDiagonal_MPIAIJ,
2806                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2807                                        NULL,
2808                                        MatGetGhosts_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2812                                        NULL,
2813                                        NULL,
2814                                        NULL,
2815                                        MatGetMultiProcBlock_MPIAIJ,
2816                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2817                                        MatGetColumnReductions_MPIAIJ,
2818                                        MatInvertBlockDiagonal_MPIAIJ,
2819                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2820                                        MatCreateSubMatricesMPI_MPIAIJ,
2821                                 /*129*/NULL,
2822                                        NULL,
2823                                        NULL,
2824                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2825                                        NULL,
2826                                 /*134*/NULL,
2827                                        NULL,
2828                                        NULL,
2829                                        NULL,
2830                                        NULL,
2831                                 /*139*/MatSetBlockSizes_MPIAIJ,
2832                                        NULL,
2833                                        NULL,
2834                                        MatFDColoringSetUp_MPIXAIJ,
2835                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2836                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2837                                 /*145*/NULL,
2838                                        NULL,
2839                                        NULL
2840 };
2841 
2842 /* ----------------------------------------------------------------------------------------*/
2843 
2844 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2845 {
2846   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2847   PetscErrorCode ierr;
2848 
2849   PetscFunctionBegin;
2850   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2851   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2856 {
2857   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2858   PetscErrorCode ierr;
2859 
2860   PetscFunctionBegin;
2861   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2862   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2867 {
2868   Mat_MPIAIJ     *b;
2869   PetscErrorCode ierr;
2870   PetscMPIInt    size;
2871 
2872   PetscFunctionBegin;
2873   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2874   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2875   b = (Mat_MPIAIJ*)B->data;
2876 
2877 #if defined(PETSC_USE_CTABLE)
2878   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2879 #else
2880   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2881 #endif
2882   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2883   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2884   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2885 
2886   /* Because the B will have been resized we simply destroy it and create a new one each time */
2887   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2888   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2889   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2890   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2891   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2892   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2893   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2894 
2895   if (!B->preallocated) {
2896     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2897     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2898     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2899     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2900     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2901   }
2902 
2903   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2904   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2905   B->preallocated  = PETSC_TRUE;
2906   B->was_assembled = PETSC_FALSE;
2907   B->assembled     = PETSC_FALSE;
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2912 {
2913   Mat_MPIAIJ     *b;
2914   PetscErrorCode ierr;
2915 
2916   PetscFunctionBegin;
2917   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2918   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2919   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2920   b = (Mat_MPIAIJ*)B->data;
2921 
2922 #if defined(PETSC_USE_CTABLE)
2923   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2924 #else
2925   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2926 #endif
2927   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2928   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2929   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2930 
2931   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2932   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2933   B->preallocated  = PETSC_TRUE;
2934   B->was_assembled = PETSC_FALSE;
2935   B->assembled = PETSC_FALSE;
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2940 {
2941   Mat            mat;
2942   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2943   PetscErrorCode ierr;
2944 
2945   PetscFunctionBegin;
2946   *newmat = NULL;
2947   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2948   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2949   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2950   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2951   a       = (Mat_MPIAIJ*)mat->data;
2952 
2953   mat->factortype   = matin->factortype;
2954   mat->assembled    = matin->assembled;
2955   mat->insertmode   = NOT_SET_VALUES;
2956   mat->preallocated = matin->preallocated;
2957 
2958   a->size         = oldmat->size;
2959   a->rank         = oldmat->rank;
2960   a->donotstash   = oldmat->donotstash;
2961   a->roworiented  = oldmat->roworiented;
2962   a->rowindices   = NULL;
2963   a->rowvalues    = NULL;
2964   a->getrowactive = PETSC_FALSE;
2965 
2966   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2967   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2968 
2969   if (oldmat->colmap) {
2970 #if defined(PETSC_USE_CTABLE)
2971     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2972 #else
2973     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2974     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2975     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2976 #endif
2977   } else a->colmap = NULL;
2978   if (oldmat->garray) {
2979     PetscInt len;
2980     len  = oldmat->B->cmap->n;
2981     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2982     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2983     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2984   } else a->garray = NULL;
2985 
2986   /* It may happen MatDuplicate is called with a non-assembled matrix
2987      In fact, MatDuplicate only requires the matrix to be preallocated
2988      This may happen inside a DMCreateMatrix_Shell */
2989   if (oldmat->lvec) {
2990     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2991     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2992   }
2993   if (oldmat->Mvctx) {
2994     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2995     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2996   }
2997   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2998   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2999   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3000   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3001   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3002   *newmat = mat;
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3007 {
3008   PetscBool      isbinary, ishdf5;
3009   PetscErrorCode ierr;
3010 
3011   PetscFunctionBegin;
3012   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3013   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3014   /* force binary viewer to load .info file if it has not yet done so */
3015   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3016   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3017   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3018   if (isbinary) {
3019     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3020   } else if (ishdf5) {
3021 #if defined(PETSC_HAVE_HDF5)
3022     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3023 #else
3024     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3025 #endif
3026   } else {
3027     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3028   }
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3033 {
3034   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3035   PetscInt       *rowidxs,*colidxs;
3036   PetscScalar    *matvals;
3037   PetscErrorCode ierr;
3038 
3039   PetscFunctionBegin;
3040   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3041 
3042   /* read in matrix header */
3043   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3044   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3045   M  = header[1]; N = header[2]; nz = header[3];
3046   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3047   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3048   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3049 
3050   /* set block sizes from the viewer's .info file */
3051   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3052   /* set global sizes if not set already */
3053   if (mat->rmap->N < 0) mat->rmap->N = M;
3054   if (mat->cmap->N < 0) mat->cmap->N = N;
3055   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3056   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3057 
3058   /* check if the matrix sizes are correct */
3059   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3060   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3061 
3062   /* read in row lengths and build row indices */
3063   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3064   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3065   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3066   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3067   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3068   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3069   /* read in column indices and matrix values */
3070   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3071   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3072   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3073   /* store matrix indices and values */
3074   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3075   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3076   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3077   PetscFunctionReturn(0);
3078 }
3079 
3080 /* Not scalable because of ISAllGather() unless getting all columns. */
3081 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3082 {
3083   PetscErrorCode ierr;
3084   IS             iscol_local;
3085   PetscBool      isstride;
3086   PetscMPIInt    lisstride=0,gisstride;
3087 
3088   PetscFunctionBegin;
3089   /* check if we are grabbing all columns*/
3090   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3091 
3092   if (isstride) {
3093     PetscInt  start,len,mstart,mlen;
3094     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3095     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3096     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3097     if (mstart == start && mlen-mstart == len) lisstride = 1;
3098   }
3099 
3100   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3101   if (gisstride) {
3102     PetscInt N;
3103     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3104     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3105     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3106     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3107   } else {
3108     PetscInt cbs;
3109     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3110     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3111     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3112   }
3113 
3114   *isseq = iscol_local;
3115   PetscFunctionReturn(0);
3116 }
3117 
3118 /*
3119  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3120  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3121 
3122  Input Parameters:
3123    mat - matrix
3124    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3125            i.e., mat->rstart <= isrow[i] < mat->rend
3126    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3127            i.e., mat->cstart <= iscol[i] < mat->cend
3128  Output Parameter:
3129    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3130    iscol_o - sequential column index set for retrieving mat->B
3131    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3132  */
3133 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3134 {
3135   PetscErrorCode ierr;
3136   Vec            x,cmap;
3137   const PetscInt *is_idx;
3138   PetscScalar    *xarray,*cmaparray;
3139   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3140   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3141   Mat            B=a->B;
3142   Vec            lvec=a->lvec,lcmap;
3143   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3144   MPI_Comm       comm;
3145   VecScatter     Mvctx=a->Mvctx;
3146 
3147   PetscFunctionBegin;
3148   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3149   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3150 
3151   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3152   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3153   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3154   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3155   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3156 
3157   /* Get start indices */
3158   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3159   isstart -= ncols;
3160   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3161 
3162   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3163   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3164   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3165   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3166   for (i=0; i<ncols; i++) {
3167     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3168     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3169     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3170   }
3171   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3172   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3173   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3174 
3175   /* Get iscol_d */
3176   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3177   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3178   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3179 
3180   /* Get isrow_d */
3181   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3182   rstart = mat->rmap->rstart;
3183   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3184   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3185   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3186   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3187 
3188   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3189   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3190   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3191 
3192   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3193   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3194   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3195 
3196   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3197 
3198   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3199   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3200 
3201   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3202   /* off-process column indices */
3203   count = 0;
3204   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3205   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3206 
3207   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3208   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3209   for (i=0; i<Bn; i++) {
3210     if (PetscRealPart(xarray[i]) > -1.0) {
3211       idx[count]     = i;                   /* local column index in off-diagonal part B */
3212       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3213       count++;
3214     }
3215   }
3216   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3217   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3218 
3219   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3220   /* cannot ensure iscol_o has same blocksize as iscol! */
3221 
3222   ierr = PetscFree(idx);CHKERRQ(ierr);
3223   *garray = cmap1;
3224 
3225   ierr = VecDestroy(&x);CHKERRQ(ierr);
3226   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3227   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3232 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3233 {
3234   PetscErrorCode ierr;
3235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3236   Mat            M = NULL;
3237   MPI_Comm       comm;
3238   IS             iscol_d,isrow_d,iscol_o;
3239   Mat            Asub = NULL,Bsub = NULL;
3240   PetscInt       n;
3241 
3242   PetscFunctionBegin;
3243   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3244 
3245   if (call == MAT_REUSE_MATRIX) {
3246     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3247     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3248     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3249 
3250     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3251     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3252 
3253     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3254     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3255 
3256     /* Update diagonal and off-diagonal portions of submat */
3257     asub = (Mat_MPIAIJ*)(*submat)->data;
3258     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3259     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3260     if (n) {
3261       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3262     }
3263     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3265 
3266   } else { /* call == MAT_INITIAL_MATRIX) */
3267     const PetscInt *garray;
3268     PetscInt        BsubN;
3269 
3270     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3271     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3272 
3273     /* Create local submatrices Asub and Bsub */
3274     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3275     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3276 
3277     /* Create submatrix M */
3278     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3279 
3280     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3281     asub = (Mat_MPIAIJ*)M->data;
3282 
3283     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3284     n = asub->B->cmap->N;
3285     if (BsubN > n) {
3286       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3287       const PetscInt *idx;
3288       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3289       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3290 
3291       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3292       j = 0;
3293       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3294       for (i=0; i<n; i++) {
3295         if (j >= BsubN) break;
3296         while (subgarray[i] > garray[j]) j++;
3297 
3298         if (subgarray[i] == garray[j]) {
3299           idx_new[i] = idx[j++];
3300         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3301       }
3302       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3303 
3304       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3305       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3306 
3307     } else if (BsubN < n) {
3308       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3309     }
3310 
3311     ierr = PetscFree(garray);CHKERRQ(ierr);
3312     *submat = M;
3313 
3314     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3315     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3316     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3317 
3318     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3319     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3320 
3321     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3322     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3323   }
3324   PetscFunctionReturn(0);
3325 }
3326 
3327 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3328 {
3329   PetscErrorCode ierr;
3330   IS             iscol_local=NULL,isrow_d;
3331   PetscInt       csize;
3332   PetscInt       n,i,j,start,end;
3333   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3334   MPI_Comm       comm;
3335 
3336   PetscFunctionBegin;
3337   /* If isrow has same processor distribution as mat,
3338      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3339   if (call == MAT_REUSE_MATRIX) {
3340     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3341     if (isrow_d) {
3342       sameRowDist  = PETSC_TRUE;
3343       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3344     } else {
3345       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3346       if (iscol_local) {
3347         sameRowDist  = PETSC_TRUE;
3348         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3349       }
3350     }
3351   } else {
3352     /* Check if isrow has same processor distribution as mat */
3353     sameDist[0] = PETSC_FALSE;
3354     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3355     if (!n) {
3356       sameDist[0] = PETSC_TRUE;
3357     } else {
3358       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3359       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3360       if (i >= start && j < end) {
3361         sameDist[0] = PETSC_TRUE;
3362       }
3363     }
3364 
3365     /* Check if iscol has same processor distribution as mat */
3366     sameDist[1] = PETSC_FALSE;
3367     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3368     if (!n) {
3369       sameDist[1] = PETSC_TRUE;
3370     } else {
3371       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3372       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3373       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3374     }
3375 
3376     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3377     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3378     sameRowDist = tsameDist[0];
3379   }
3380 
3381   if (sameRowDist) {
3382     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3383       /* isrow and iscol have same processor distribution as mat */
3384       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3385       PetscFunctionReturn(0);
3386     } else { /* sameRowDist */
3387       /* isrow has same processor distribution as mat */
3388       if (call == MAT_INITIAL_MATRIX) {
3389         PetscBool sorted;
3390         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3391         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3392         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3393         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3394 
3395         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3396         if (sorted) {
3397           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3398           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3399           PetscFunctionReturn(0);
3400         }
3401       } else { /* call == MAT_REUSE_MATRIX */
3402         IS iscol_sub;
3403         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3404         if (iscol_sub) {
3405           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3406           PetscFunctionReturn(0);
3407         }
3408       }
3409     }
3410   }
3411 
3412   /* General case: iscol -> iscol_local which has global size of iscol */
3413   if (call == MAT_REUSE_MATRIX) {
3414     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3415     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3416   } else {
3417     if (!iscol_local) {
3418       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3419     }
3420   }
3421 
3422   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3423   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3424 
3425   if (call == MAT_INITIAL_MATRIX) {
3426     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3427     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3428   }
3429   PetscFunctionReturn(0);
3430 }
3431 
3432 /*@C
3433      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3434          and "off-diagonal" part of the matrix in CSR format.
3435 
3436    Collective
3437 
3438    Input Parameters:
3439 +  comm - MPI communicator
3440 .  A - "diagonal" portion of matrix
3441 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3442 -  garray - global index of B columns
3443 
3444    Output Parameter:
3445 .   mat - the matrix, with input A as its local diagonal matrix
3446    Level: advanced
3447 
3448    Notes:
3449        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3450        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3451 
3452 .seealso: MatCreateMPIAIJWithSplitArrays()
3453 @*/
3454 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3455 {
3456   PetscErrorCode    ierr;
3457   Mat_MPIAIJ        *maij;
3458   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3459   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3460   const PetscScalar *oa;
3461   Mat               Bnew;
3462   PetscInt          m,n,N;
3463 
3464   PetscFunctionBegin;
3465   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3466   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3467   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3468   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3469   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3470   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3471 
3472   /* Get global columns of mat */
3473   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3474 
3475   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3476   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3477   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3478   maij = (Mat_MPIAIJ*)(*mat)->data;
3479 
3480   (*mat)->preallocated = PETSC_TRUE;
3481 
3482   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3483   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3484 
3485   /* Set A as diagonal portion of *mat */
3486   maij->A = A;
3487 
3488   nz = oi[m];
3489   for (i=0; i<nz; i++) {
3490     col   = oj[i];
3491     oj[i] = garray[col];
3492   }
3493 
3494   /* Set Bnew as off-diagonal portion of *mat */
3495   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3496   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3497   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3498   bnew        = (Mat_SeqAIJ*)Bnew->data;
3499   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3500   maij->B     = Bnew;
3501 
3502   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3503 
3504   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3505   b->free_a       = PETSC_FALSE;
3506   b->free_ij      = PETSC_FALSE;
3507   ierr = MatDestroy(&B);CHKERRQ(ierr);
3508 
3509   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3510   bnew->free_a       = PETSC_TRUE;
3511   bnew->free_ij      = PETSC_TRUE;
3512 
3513   /* condense columns of maij->B */
3514   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3515   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3516   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3517   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3518   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3523 
3524 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3525 {
3526   PetscErrorCode ierr;
3527   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3528   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3529   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3530   Mat            M,Msub,B=a->B;
3531   MatScalar      *aa;
3532   Mat_SeqAIJ     *aij;
3533   PetscInt       *garray = a->garray,*colsub,Ncols;
3534   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3535   IS             iscol_sub,iscmap;
3536   const PetscInt *is_idx,*cmap;
3537   PetscBool      allcolumns=PETSC_FALSE;
3538   MPI_Comm       comm;
3539 
3540   PetscFunctionBegin;
3541   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3542   if (call == MAT_REUSE_MATRIX) {
3543     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3544     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3545     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3546 
3547     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3548     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3549 
3550     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3551     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3552 
3553     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3554 
3555   } else { /* call == MAT_INITIAL_MATRIX) */
3556     PetscBool flg;
3557 
3558     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3559     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3560 
3561     /* (1) iscol -> nonscalable iscol_local */
3562     /* Check for special case: each processor gets entire matrix columns */
3563     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3564     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3565     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3566     if (allcolumns) {
3567       iscol_sub = iscol_local;
3568       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3569       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3570 
3571     } else {
3572       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3573       PetscInt *idx,*cmap1,k;
3574       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3575       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3576       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3577       count = 0;
3578       k     = 0;
3579       for (i=0; i<Ncols; i++) {
3580         j = is_idx[i];
3581         if (j >= cstart && j < cend) {
3582           /* diagonal part of mat */
3583           idx[count]     = j;
3584           cmap1[count++] = i; /* column index in submat */
3585         } else if (Bn) {
3586           /* off-diagonal part of mat */
3587           if (j == garray[k]) {
3588             idx[count]     = j;
3589             cmap1[count++] = i;  /* column index in submat */
3590           } else if (j > garray[k]) {
3591             while (j > garray[k] && k < Bn-1) k++;
3592             if (j == garray[k]) {
3593               idx[count]     = j;
3594               cmap1[count++] = i; /* column index in submat */
3595             }
3596           }
3597         }
3598       }
3599       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3600 
3601       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3602       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3603       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3604 
3605       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3606     }
3607 
3608     /* (3) Create sequential Msub */
3609     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3610   }
3611 
3612   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3613   aij  = (Mat_SeqAIJ*)(Msub)->data;
3614   ii   = aij->i;
3615   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3616 
3617   /*
3618       m - number of local rows
3619       Ncols - number of columns (same on all processors)
3620       rstart - first row in new global matrix generated
3621   */
3622   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3623 
3624   if (call == MAT_INITIAL_MATRIX) {
3625     /* (4) Create parallel newmat */
3626     PetscMPIInt    rank,size;
3627     PetscInt       csize;
3628 
3629     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3630     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3631 
3632     /*
3633         Determine the number of non-zeros in the diagonal and off-diagonal
3634         portions of the matrix in order to do correct preallocation
3635     */
3636 
3637     /* first get start and end of "diagonal" columns */
3638     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3639     if (csize == PETSC_DECIDE) {
3640       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3641       if (mglobal == Ncols) { /* square matrix */
3642         nlocal = m;
3643       } else {
3644         nlocal = Ncols/size + ((Ncols % size) > rank);
3645       }
3646     } else {
3647       nlocal = csize;
3648     }
3649     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3650     rstart = rend - nlocal;
3651     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3652 
3653     /* next, compute all the lengths */
3654     jj    = aij->j;
3655     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3656     olens = dlens + m;
3657     for (i=0; i<m; i++) {
3658       jend = ii[i+1] - ii[i];
3659       olen = 0;
3660       dlen = 0;
3661       for (j=0; j<jend; j++) {
3662         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3663         else dlen++;
3664         jj++;
3665       }
3666       olens[i] = olen;
3667       dlens[i] = dlen;
3668     }
3669 
3670     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3671     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3672 
3673     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3674     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3675     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3676     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3677     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3678     ierr = PetscFree(dlens);CHKERRQ(ierr);
3679 
3680   } else { /* call == MAT_REUSE_MATRIX */
3681     M    = *newmat;
3682     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3683     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3684     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3685     /*
3686          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3687        rather than the slower MatSetValues().
3688     */
3689     M->was_assembled = PETSC_TRUE;
3690     M->assembled     = PETSC_FALSE;
3691   }
3692 
3693   /* (5) Set values of Msub to *newmat */
3694   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3695   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3696 
3697   jj   = aij->j;
3698   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3699   for (i=0; i<m; i++) {
3700     row = rstart + i;
3701     nz  = ii[i+1] - ii[i];
3702     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3703     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3704     jj += nz; aa += nz;
3705   }
3706   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3707   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3708 
3709   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3710   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711 
3712   ierr = PetscFree(colsub);CHKERRQ(ierr);
3713 
3714   /* save Msub, iscol_sub and iscmap used in processor for next request */
3715   if (call == MAT_INITIAL_MATRIX) {
3716     *newmat = M;
3717     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3718     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3719 
3720     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3721     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3722 
3723     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3724     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3725 
3726     if (iscol_local) {
3727       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3728       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3729     }
3730   }
3731   PetscFunctionReturn(0);
3732 }
3733 
3734 /*
3735     Not great since it makes two copies of the submatrix, first an SeqAIJ
3736   in local and then by concatenating the local matrices the end result.
3737   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3738 
3739   Note: This requires a sequential iscol with all indices.
3740 */
3741 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3742 {
3743   PetscErrorCode ierr;
3744   PetscMPIInt    rank,size;
3745   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3746   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3747   Mat            M,Mreuse;
3748   MatScalar      *aa,*vwork;
3749   MPI_Comm       comm;
3750   Mat_SeqAIJ     *aij;
3751   PetscBool      colflag,allcolumns=PETSC_FALSE;
3752 
3753   PetscFunctionBegin;
3754   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3755   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3756   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3757 
3758   /* Check for special case: each processor gets entire matrix columns */
3759   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3760   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3761   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3762   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3763 
3764   if (call ==  MAT_REUSE_MATRIX) {
3765     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3766     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3767     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3768   } else {
3769     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3770   }
3771 
3772   /*
3773       m - number of local rows
3774       n - number of columns (same on all processors)
3775       rstart - first row in new global matrix generated
3776   */
3777   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3778   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3779   if (call == MAT_INITIAL_MATRIX) {
3780     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3781     ii  = aij->i;
3782     jj  = aij->j;
3783 
3784     /*
3785         Determine the number of non-zeros in the diagonal and off-diagonal
3786         portions of the matrix in order to do correct preallocation
3787     */
3788 
3789     /* first get start and end of "diagonal" columns */
3790     if (csize == PETSC_DECIDE) {
3791       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3792       if (mglobal == n) { /* square matrix */
3793         nlocal = m;
3794       } else {
3795         nlocal = n/size + ((n % size) > rank);
3796       }
3797     } else {
3798       nlocal = csize;
3799     }
3800     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3801     rstart = rend - nlocal;
3802     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3803 
3804     /* next, compute all the lengths */
3805     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3806     olens = dlens + m;
3807     for (i=0; i<m; i++) {
3808       jend = ii[i+1] - ii[i];
3809       olen = 0;
3810       dlen = 0;
3811       for (j=0; j<jend; j++) {
3812         if (*jj < rstart || *jj >= rend) olen++;
3813         else dlen++;
3814         jj++;
3815       }
3816       olens[i] = olen;
3817       dlens[i] = dlen;
3818     }
3819     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3820     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3821     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3822     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3823     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3824     ierr = PetscFree(dlens);CHKERRQ(ierr);
3825   } else {
3826     PetscInt ml,nl;
3827 
3828     M    = *newmat;
3829     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3830     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3831     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3832     /*
3833          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3834        rather than the slower MatSetValues().
3835     */
3836     M->was_assembled = PETSC_TRUE;
3837     M->assembled     = PETSC_FALSE;
3838   }
3839   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3840   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3841   ii   = aij->i;
3842   jj   = aij->j;
3843 
3844   /* trigger copy to CPU if needed */
3845   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3846   for (i=0; i<m; i++) {
3847     row   = rstart + i;
3848     nz    = ii[i+1] - ii[i];
3849     cwork = jj; jj += nz;
3850     vwork = aa; aa += nz;
3851     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3852   }
3853   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3854 
3855   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3856   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3857   *newmat = M;
3858 
3859   /* save submatrix used in processor for next request */
3860   if (call ==  MAT_INITIAL_MATRIX) {
3861     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3862     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3863   }
3864   PetscFunctionReturn(0);
3865 }
3866 
3867 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3868 {
3869   PetscInt       m,cstart, cend,j,nnz,i,d;
3870   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3871   const PetscInt *JJ;
3872   PetscErrorCode ierr;
3873   PetscBool      nooffprocentries;
3874 
3875   PetscFunctionBegin;
3876   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3877 
3878   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3879   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3880   m      = B->rmap->n;
3881   cstart = B->cmap->rstart;
3882   cend   = B->cmap->rend;
3883   rstart = B->rmap->rstart;
3884 
3885   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3886 
3887   if (PetscDefined(USE_DEBUG)) {
3888     for (i=0; i<m; i++) {
3889       nnz = Ii[i+1]- Ii[i];
3890       JJ  = J + Ii[i];
3891       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3892       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3893       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3894     }
3895   }
3896 
3897   for (i=0; i<m; i++) {
3898     nnz     = Ii[i+1]- Ii[i];
3899     JJ      = J + Ii[i];
3900     nnz_max = PetscMax(nnz_max,nnz);
3901     d       = 0;
3902     for (j=0; j<nnz; j++) {
3903       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3904     }
3905     d_nnz[i] = d;
3906     o_nnz[i] = nnz - d;
3907   }
3908   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3909   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3910 
3911   for (i=0; i<m; i++) {
3912     ii   = i + rstart;
3913     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3914   }
3915   nooffprocentries    = B->nooffprocentries;
3916   B->nooffprocentries = PETSC_TRUE;
3917   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3918   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3919   B->nooffprocentries = nooffprocentries;
3920 
3921   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 /*@
3926    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3927    (the default parallel PETSc format).
3928 
3929    Collective
3930 
3931    Input Parameters:
3932 +  B - the matrix
3933 .  i - the indices into j for the start of each local row (starts with zero)
3934 .  j - the column indices for each local row (starts with zero)
3935 -  v - optional values in the matrix
3936 
3937    Level: developer
3938 
3939    Notes:
3940        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3941      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3942      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3943 
3944        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3945 
3946        The format which is used for the sparse matrix input, is equivalent to a
3947     row-major ordering.. i.e for the following matrix, the input data expected is
3948     as shown
3949 
3950 $        1 0 0
3951 $        2 0 3     P0
3952 $       -------
3953 $        4 5 6     P1
3954 $
3955 $     Process0 [P0]: rows_owned=[0,1]
3956 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3957 $        j =  {0,0,2}  [size = 3]
3958 $        v =  {1,2,3}  [size = 3]
3959 $
3960 $     Process1 [P1]: rows_owned=[2]
3961 $        i =  {0,3}    [size = nrow+1  = 1+1]
3962 $        j =  {0,1,2}  [size = 3]
3963 $        v =  {4,5,6}  [size = 3]
3964 
3965 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3966           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3967 @*/
3968 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3969 {
3970   PetscErrorCode ierr;
3971 
3972   PetscFunctionBegin;
3973   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3974   PetscFunctionReturn(0);
3975 }
3976 
3977 /*@C
3978    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3979    (the default parallel PETSc format).  For good matrix assembly performance
3980    the user should preallocate the matrix storage by setting the parameters
3981    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3982    performance can be increased by more than a factor of 50.
3983 
3984    Collective
3985 
3986    Input Parameters:
3987 +  B - the matrix
3988 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3989            (same value is used for all local rows)
3990 .  d_nnz - array containing the number of nonzeros in the various rows of the
3991            DIAGONAL portion of the local submatrix (possibly different for each row)
3992            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3993            The size of this array is equal to the number of local rows, i.e 'm'.
3994            For matrices that will be factored, you must leave room for (and set)
3995            the diagonal entry even if it is zero.
3996 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3997            submatrix (same value is used for all local rows).
3998 -  o_nnz - array containing the number of nonzeros in the various rows of the
3999            OFF-DIAGONAL portion of the local submatrix (possibly different for
4000            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4001            structure. The size of this array is equal to the number
4002            of local rows, i.e 'm'.
4003 
4004    If the *_nnz parameter is given then the *_nz parameter is ignored
4005 
4006    The AIJ format (also called the Yale sparse matrix format or
4007    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4008    storage.  The stored row and column indices begin with zero.
4009    See Users-Manual: ch_mat for details.
4010 
4011    The parallel matrix is partitioned such that the first m0 rows belong to
4012    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4013    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4014 
4015    The DIAGONAL portion of the local submatrix of a processor can be defined
4016    as the submatrix which is obtained by extraction the part corresponding to
4017    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4018    first row that belongs to the processor, r2 is the last row belonging to
4019    the this processor, and c1-c2 is range of indices of the local part of a
4020    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4021    common case of a square matrix, the row and column ranges are the same and
4022    the DIAGONAL part is also square. The remaining portion of the local
4023    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4024 
4025    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4026 
4027    You can call MatGetInfo() to get information on how effective the preallocation was;
4028    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4029    You can also run with the option -info and look for messages with the string
4030    malloc in them to see if additional memory allocation was needed.
4031 
4032    Example usage:
4033 
4034    Consider the following 8x8 matrix with 34 non-zero values, that is
4035    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4036    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4037    as follows:
4038 
4039 .vb
4040             1  2  0  |  0  3  0  |  0  4
4041     Proc0   0  5  6  |  7  0  0  |  8  0
4042             9  0 10  | 11  0  0  | 12  0
4043     -------------------------------------
4044            13  0 14  | 15 16 17  |  0  0
4045     Proc1   0 18  0  | 19 20 21  |  0  0
4046             0  0  0  | 22 23  0  | 24  0
4047     -------------------------------------
4048     Proc2  25 26 27  |  0  0 28  | 29  0
4049            30  0  0  | 31 32 33  |  0 34
4050 .ve
4051 
4052    This can be represented as a collection of submatrices as:
4053 
4054 .vb
4055       A B C
4056       D E F
4057       G H I
4058 .ve
4059 
4060    Where the submatrices A,B,C are owned by proc0, D,E,F are
4061    owned by proc1, G,H,I are owned by proc2.
4062 
4063    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4064    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4065    The 'M','N' parameters are 8,8, and have the same values on all procs.
4066 
4067    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4068    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4069    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4070    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4071    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4072    matrix, ans [DF] as another SeqAIJ matrix.
4073 
4074    When d_nz, o_nz parameters are specified, d_nz storage elements are
4075    allocated for every row of the local diagonal submatrix, and o_nz
4076    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4077    One way to choose d_nz and o_nz is to use the max nonzerors per local
4078    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4079    In this case, the values of d_nz,o_nz are:
4080 .vb
4081      proc0 : dnz = 2, o_nz = 2
4082      proc1 : dnz = 3, o_nz = 2
4083      proc2 : dnz = 1, o_nz = 4
4084 .ve
4085    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4086    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4087    for proc3. i.e we are using 12+15+10=37 storage locations to store
4088    34 values.
4089 
4090    When d_nnz, o_nnz parameters are specified, the storage is specified
4091    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4092    In the above case the values for d_nnz,o_nnz are:
4093 .vb
4094      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4095      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4096      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4097 .ve
4098    Here the space allocated is sum of all the above values i.e 34, and
4099    hence pre-allocation is perfect.
4100 
4101    Level: intermediate
4102 
4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4104           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4105 @*/
4106 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4107 {
4108   PetscErrorCode ierr;
4109 
4110   PetscFunctionBegin;
4111   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4112   PetscValidType(B,1);
4113   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4114   PetscFunctionReturn(0);
4115 }
4116 
4117 /*@
4118      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4119          CSR format for the local rows.
4120 
4121    Collective
4122 
4123    Input Parameters:
4124 +  comm - MPI communicator
4125 .  m - number of local rows (Cannot be PETSC_DECIDE)
4126 .  n - This value should be the same as the local size used in creating the
4127        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4128        calculated if N is given) For square matrices n is almost always m.
4129 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4130 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4131 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4132 .   j - column indices
4133 -   a - matrix values
4134 
4135    Output Parameter:
4136 .   mat - the matrix
4137 
4138    Level: intermediate
4139 
4140    Notes:
4141        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4142      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4143      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4144 
4145        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4146 
4147        The format which is used for the sparse matrix input, is equivalent to a
4148     row-major ordering.. i.e for the following matrix, the input data expected is
4149     as shown
4150 
4151        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4152 
4153 $        1 0 0
4154 $        2 0 3     P0
4155 $       -------
4156 $        4 5 6     P1
4157 $
4158 $     Process0 [P0]: rows_owned=[0,1]
4159 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4160 $        j =  {0,0,2}  [size = 3]
4161 $        v =  {1,2,3}  [size = 3]
4162 $
4163 $     Process1 [P1]: rows_owned=[2]
4164 $        i =  {0,3}    [size = nrow+1  = 1+1]
4165 $        j =  {0,1,2}  [size = 3]
4166 $        v =  {4,5,6}  [size = 3]
4167 
4168 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4169           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4170 @*/
4171 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4172 {
4173   PetscErrorCode ierr;
4174 
4175   PetscFunctionBegin;
4176   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4177   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4178   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4179   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4180   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4181   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4182   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4183   PetscFunctionReturn(0);
4184 }
4185 
4186 /*@
4187      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4188          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4189 
4190    Collective
4191 
4192    Input Parameters:
4193 +  mat - the matrix
4194 .  m - number of local rows (Cannot be PETSC_DECIDE)
4195 .  n - This value should be the same as the local size used in creating the
4196        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4197        calculated if N is given) For square matrices n is almost always m.
4198 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4199 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4200 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4201 .  J - column indices
4202 -  v - matrix values
4203 
4204    Level: intermediate
4205 
4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4207           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4208 @*/
4209 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4210 {
4211   PetscErrorCode ierr;
4212   PetscInt       cstart,nnz,i,j;
4213   PetscInt       *ld;
4214   PetscBool      nooffprocentries;
4215   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4216   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4217   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4218   const PetscInt *Adi = Ad->i;
4219   PetscInt       ldi,Iii,md;
4220 
4221   PetscFunctionBegin;
4222   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4223   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4224   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4225   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4226 
4227   cstart = mat->cmap->rstart;
4228   if (!Aij->ld) {
4229     /* count number of entries below block diagonal */
4230     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4231     Aij->ld = ld;
4232     for (i=0; i<m; i++) {
4233       nnz  = Ii[i+1]- Ii[i];
4234       j     = 0;
4235       while  (J[j] < cstart && j < nnz) {j++;}
4236       J    += nnz;
4237       ld[i] = j;
4238     }
4239   } else {
4240     ld = Aij->ld;
4241   }
4242 
4243   for (i=0; i<m; i++) {
4244     nnz  = Ii[i+1]- Ii[i];
4245     Iii  = Ii[i];
4246     ldi  = ld[i];
4247     md   = Adi[i+1]-Adi[i];
4248     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4249     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4250     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4251     ad  += md;
4252     ao  += nnz - md;
4253   }
4254   nooffprocentries      = mat->nooffprocentries;
4255   mat->nooffprocentries = PETSC_TRUE;
4256   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4257   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4258   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4259   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4260   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4261   mat->nooffprocentries = nooffprocentries;
4262   PetscFunctionReturn(0);
4263 }
4264 
4265 /*@C
4266    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4267    (the default parallel PETSc format).  For good matrix assembly performance
4268    the user should preallocate the matrix storage by setting the parameters
4269    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4270    performance can be increased by more than a factor of 50.
4271 
4272    Collective
4273 
4274    Input Parameters:
4275 +  comm - MPI communicator
4276 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4277            This value should be the same as the local size used in creating the
4278            y vector for the matrix-vector product y = Ax.
4279 .  n - This value should be the same as the local size used in creating the
4280        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4281        calculated if N is given) For square matrices n is almost always m.
4282 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4283 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4284 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4285            (same value is used for all local rows)
4286 .  d_nnz - array containing the number of nonzeros in the various rows of the
4287            DIAGONAL portion of the local submatrix (possibly different for each row)
4288            or NULL, if d_nz is used to specify the nonzero structure.
4289            The size of this array is equal to the number of local rows, i.e 'm'.
4290 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4291            submatrix (same value is used for all local rows).
4292 -  o_nnz - array containing the number of nonzeros in the various rows of the
4293            OFF-DIAGONAL portion of the local submatrix (possibly different for
4294            each row) or NULL, if o_nz is used to specify the nonzero
4295            structure. The size of this array is equal to the number
4296            of local rows, i.e 'm'.
4297 
4298    Output Parameter:
4299 .  A - the matrix
4300 
4301    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4302    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4303    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4304 
4305    Notes:
4306    If the *_nnz parameter is given then the *_nz parameter is ignored
4307 
4308    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4309    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4310    storage requirements for this matrix.
4311 
4312    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4313    processor than it must be used on all processors that share the object for
4314    that argument.
4315 
4316    The user MUST specify either the local or global matrix dimensions
4317    (possibly both).
4318 
4319    The parallel matrix is partitioned across processors such that the
4320    first m0 rows belong to process 0, the next m1 rows belong to
4321    process 1, the next m2 rows belong to process 2 etc.. where
4322    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4323    values corresponding to [m x N] submatrix.
4324 
4325    The columns are logically partitioned with the n0 columns belonging
4326    to 0th partition, the next n1 columns belonging to the next
4327    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4328 
4329    The DIAGONAL portion of the local submatrix on any given processor
4330    is the submatrix corresponding to the rows and columns m,n
4331    corresponding to the given processor. i.e diagonal matrix on
4332    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4333    etc. The remaining portion of the local submatrix [m x (N-n)]
4334    constitute the OFF-DIAGONAL portion. The example below better
4335    illustrates this concept.
4336 
4337    For a square global matrix we define each processor's diagonal portion
4338    to be its local rows and the corresponding columns (a square submatrix);
4339    each processor's off-diagonal portion encompasses the remainder of the
4340    local matrix (a rectangular submatrix).
4341 
4342    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4343 
4344    When calling this routine with a single process communicator, a matrix of
4345    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4346    type of communicator, use the construction mechanism
4347 .vb
4348      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4349 .ve
4350 
4351 $     MatCreate(...,&A);
4352 $     MatSetType(A,MATMPIAIJ);
4353 $     MatSetSizes(A, m,n,M,N);
4354 $     MatMPIAIJSetPreallocation(A,...);
4355 
4356    By default, this format uses inodes (identical nodes) when possible.
4357    We search for consecutive rows with the same nonzero structure, thereby
4358    reusing matrix information to achieve increased efficiency.
4359 
4360    Options Database Keys:
4361 +  -mat_no_inode  - Do not use inodes
4362 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4363 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4364         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4365         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4366 
4367    Example usage:
4368 
4369    Consider the following 8x8 matrix with 34 non-zero values, that is
4370    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4371    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4372    as follows
4373 
4374 .vb
4375             1  2  0  |  0  3  0  |  0  4
4376     Proc0   0  5  6  |  7  0  0  |  8  0
4377             9  0 10  | 11  0  0  | 12  0
4378     -------------------------------------
4379            13  0 14  | 15 16 17  |  0  0
4380     Proc1   0 18  0  | 19 20 21  |  0  0
4381             0  0  0  | 22 23  0  | 24  0
4382     -------------------------------------
4383     Proc2  25 26 27  |  0  0 28  | 29  0
4384            30  0  0  | 31 32 33  |  0 34
4385 .ve
4386 
4387    This can be represented as a collection of submatrices as
4388 
4389 .vb
4390       A B C
4391       D E F
4392       G H I
4393 .ve
4394 
4395    Where the submatrices A,B,C are owned by proc0, D,E,F are
4396    owned by proc1, G,H,I are owned by proc2.
4397 
4398    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4399    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4400    The 'M','N' parameters are 8,8, and have the same values on all procs.
4401 
4402    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4403    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4404    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4405    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4406    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4407    matrix, ans [DF] as another SeqAIJ matrix.
4408 
4409    When d_nz, o_nz parameters are specified, d_nz storage elements are
4410    allocated for every row of the local diagonal submatrix, and o_nz
4411    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4412    One way to choose d_nz and o_nz is to use the max nonzerors per local
4413    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4414    In this case, the values of d_nz,o_nz are
4415 .vb
4416      proc0 : dnz = 2, o_nz = 2
4417      proc1 : dnz = 3, o_nz = 2
4418      proc2 : dnz = 1, o_nz = 4
4419 .ve
4420    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4421    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4422    for proc3. i.e we are using 12+15+10=37 storage locations to store
4423    34 values.
4424 
4425    When d_nnz, o_nnz parameters are specified, the storage is specified
4426    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4427    In the above case the values for d_nnz,o_nnz are
4428 .vb
4429      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4430      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4431      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4432 .ve
4433    Here the space allocated is sum of all the above values i.e 34, and
4434    hence pre-allocation is perfect.
4435 
4436    Level: intermediate
4437 
4438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4439           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4440 @*/
4441 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4442 {
4443   PetscErrorCode ierr;
4444   PetscMPIInt    size;
4445 
4446   PetscFunctionBegin;
4447   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4448   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4449   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4450   if (size > 1) {
4451     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4452     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4453   } else {
4454     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4455     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4456   }
4457   PetscFunctionReturn(0);
4458 }
4459 
4460 /*@C
4461   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4462 
4463   Not collective
4464 
4465   Input Parameter:
4466 . A - The MPIAIJ matrix
4467 
4468   Output Parameters:
4469 + Ad - The local diagonal block as a SeqAIJ matrix
4470 . Ao - The local off-diagonal block as a SeqAIJ matrix
4471 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4472 
4473   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4474   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4475   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4476   local column numbers to global column numbers in the original matrix.
4477 
4478   Level: intermediate
4479 
4480 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4481 @*/
4482 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4483 {
4484   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4485   PetscBool      flg;
4486   PetscErrorCode ierr;
4487 
4488   PetscFunctionBegin;
4489   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4490   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4491   if (Ad)     *Ad     = a->A;
4492   if (Ao)     *Ao     = a->B;
4493   if (colmap) *colmap = a->garray;
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4498 {
4499   PetscErrorCode ierr;
4500   PetscInt       m,N,i,rstart,nnz,Ii;
4501   PetscInt       *indx;
4502   PetscScalar    *values;
4503   MatType        rootType;
4504 
4505   PetscFunctionBegin;
4506   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4507   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4508     PetscInt       *dnz,*onz,sum,bs,cbs;
4509 
4510     if (n == PETSC_DECIDE) {
4511       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4512     }
4513     /* Check sum(n) = N */
4514     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4515     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4516 
4517     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4518     rstart -= m;
4519 
4520     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4521     for (i=0; i<m; i++) {
4522       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4523       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4524       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4525     }
4526 
4527     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4528     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4529     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4530     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4531     ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr);
4532     ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr);
4533     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4534     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4535     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4536     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4537   }
4538 
4539   /* numeric phase */
4540   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4541   for (i=0; i<m; i++) {
4542     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4543     Ii   = i + rstart;
4544     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4545     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4546   }
4547   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4548   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4549   PetscFunctionReturn(0);
4550 }
4551 
4552 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4553 {
4554   PetscErrorCode    ierr;
4555   PetscMPIInt       rank;
4556   PetscInt          m,N,i,rstart,nnz;
4557   size_t            len;
4558   const PetscInt    *indx;
4559   PetscViewer       out;
4560   char              *name;
4561   Mat               B;
4562   const PetscScalar *values;
4563 
4564   PetscFunctionBegin;
4565   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4566   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4567   /* Should this be the type of the diagonal block of A? */
4568   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4569   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4570   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4571   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4572   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4573   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4574   for (i=0; i<m; i++) {
4575     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4576     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4577     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4578   }
4579   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4580   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4581 
4582   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4583   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4584   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4585   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4586   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4587   ierr = PetscFree(name);CHKERRQ(ierr);
4588   ierr = MatView(B,out);CHKERRQ(ierr);
4589   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4590   ierr = MatDestroy(&B);CHKERRQ(ierr);
4591   PetscFunctionReturn(0);
4592 }
4593 
4594 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4595 {
4596   PetscErrorCode      ierr;
4597   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4598 
4599   PetscFunctionBegin;
4600   if (!merge) PetscFunctionReturn(0);
4601   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4602   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4603   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4604   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4605   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4606   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4607   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4608   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4609   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4610   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4611   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4612   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4613   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4614   ierr = PetscFree(merge);CHKERRQ(ierr);
4615   PetscFunctionReturn(0);
4616 }
4617 
4618 #include <../src/mat/utils/freespace.h>
4619 #include <petscbt.h>
4620 
4621 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4622 {
4623   PetscErrorCode      ierr;
4624   MPI_Comm            comm;
4625   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4626   PetscMPIInt         size,rank,taga,*len_s;
4627   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4628   PetscInt            proc,m;
4629   PetscInt            **buf_ri,**buf_rj;
4630   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4631   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4632   MPI_Request         *s_waits,*r_waits;
4633   MPI_Status          *status;
4634   MatScalar           *aa=a->a;
4635   MatScalar           **abuf_r,*ba_i;
4636   Mat_Merge_SeqsToMPI *merge;
4637   PetscContainer      container;
4638 
4639   PetscFunctionBegin;
4640   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4641   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4642 
4643   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4644   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4645 
4646   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4647   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4648   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4649 
4650   bi     = merge->bi;
4651   bj     = merge->bj;
4652   buf_ri = merge->buf_ri;
4653   buf_rj = merge->buf_rj;
4654 
4655   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4656   owners = merge->rowmap->range;
4657   len_s  = merge->len_s;
4658 
4659   /* send and recv matrix values */
4660   /*-----------------------------*/
4661   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4662   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4663 
4664   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4665   for (proc=0,k=0; proc<size; proc++) {
4666     if (!len_s[proc]) continue;
4667     i    = owners[proc];
4668     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4669     k++;
4670   }
4671 
4672   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4673   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4674   ierr = PetscFree(status);CHKERRQ(ierr);
4675 
4676   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4677   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4678 
4679   /* insert mat values of mpimat */
4680   /*----------------------------*/
4681   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4682   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4683 
4684   for (k=0; k<merge->nrecv; k++) {
4685     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4686     nrows       = *(buf_ri_k[k]);
4687     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4688     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4689   }
4690 
4691   /* set values of ba */
4692   m = merge->rowmap->n;
4693   for (i=0; i<m; i++) {
4694     arow = owners[rank] + i;
4695     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4696     bnzi = bi[i+1] - bi[i];
4697     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4698 
4699     /* add local non-zero vals of this proc's seqmat into ba */
4700     anzi   = ai[arow+1] - ai[arow];
4701     aj     = a->j + ai[arow];
4702     aa     = a->a + ai[arow];
4703     nextaj = 0;
4704     for (j=0; nextaj<anzi; j++) {
4705       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4706         ba_i[j] += aa[nextaj++];
4707       }
4708     }
4709 
4710     /* add received vals into ba */
4711     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4712       /* i-th row */
4713       if (i == *nextrow[k]) {
4714         anzi   = *(nextai[k]+1) - *nextai[k];
4715         aj     = buf_rj[k] + *(nextai[k]);
4716         aa     = abuf_r[k] + *(nextai[k]);
4717         nextaj = 0;
4718         for (j=0; nextaj<anzi; j++) {
4719           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4720             ba_i[j] += aa[nextaj++];
4721           }
4722         }
4723         nextrow[k]++; nextai[k]++;
4724       }
4725     }
4726     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4727   }
4728   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4729   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4730 
4731   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4732   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4733   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4734   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4735   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4736   PetscFunctionReturn(0);
4737 }
4738 
4739 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4740 {
4741   PetscErrorCode      ierr;
4742   Mat                 B_mpi;
4743   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4744   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4745   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4746   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4747   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4748   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4749   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4750   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4751   MPI_Status          *status;
4752   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4753   PetscBT             lnkbt;
4754   Mat_Merge_SeqsToMPI *merge;
4755   PetscContainer      container;
4756 
4757   PetscFunctionBegin;
4758   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4759 
4760   /* make sure it is a PETSc comm */
4761   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4762   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4763   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4764 
4765   ierr = PetscNew(&merge);CHKERRQ(ierr);
4766   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4767 
4768   /* determine row ownership */
4769   /*---------------------------------------------------------*/
4770   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4771   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4772   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4773   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4774   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4775   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4776   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4777 
4778   m      = merge->rowmap->n;
4779   owners = merge->rowmap->range;
4780 
4781   /* determine the number of messages to send, their lengths */
4782   /*---------------------------------------------------------*/
4783   len_s = merge->len_s;
4784 
4785   len          = 0; /* length of buf_si[] */
4786   merge->nsend = 0;
4787   for (proc=0; proc<size; proc++) {
4788     len_si[proc] = 0;
4789     if (proc == rank) {
4790       len_s[proc] = 0;
4791     } else {
4792       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4793       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4794     }
4795     if (len_s[proc]) {
4796       merge->nsend++;
4797       nrows = 0;
4798       for (i=owners[proc]; i<owners[proc+1]; i++) {
4799         if (ai[i+1] > ai[i]) nrows++;
4800       }
4801       len_si[proc] = 2*(nrows+1);
4802       len         += len_si[proc];
4803     }
4804   }
4805 
4806   /* determine the number and length of messages to receive for ij-structure */
4807   /*-------------------------------------------------------------------------*/
4808   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4809   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4810 
4811   /* post the Irecv of j-structure */
4812   /*-------------------------------*/
4813   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4814   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4815 
4816   /* post the Isend of j-structure */
4817   /*--------------------------------*/
4818   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4819 
4820   for (proc=0, k=0; proc<size; proc++) {
4821     if (!len_s[proc]) continue;
4822     i    = owners[proc];
4823     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4824     k++;
4825   }
4826 
4827   /* receives and sends of j-structure are complete */
4828   /*------------------------------------------------*/
4829   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4830   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4831 
4832   /* send and recv i-structure */
4833   /*---------------------------*/
4834   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4835   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4836 
4837   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4838   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4839   for (proc=0,k=0; proc<size; proc++) {
4840     if (!len_s[proc]) continue;
4841     /* form outgoing message for i-structure:
4842          buf_si[0]:                 nrows to be sent
4843                [1:nrows]:           row index (global)
4844                [nrows+1:2*nrows+1]: i-structure index
4845     */
4846     /*-------------------------------------------*/
4847     nrows       = len_si[proc]/2 - 1;
4848     buf_si_i    = buf_si + nrows+1;
4849     buf_si[0]   = nrows;
4850     buf_si_i[0] = 0;
4851     nrows       = 0;
4852     for (i=owners[proc]; i<owners[proc+1]; i++) {
4853       anzi = ai[i+1] - ai[i];
4854       if (anzi) {
4855         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4856         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4857         nrows++;
4858       }
4859     }
4860     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4861     k++;
4862     buf_si += len_si[proc];
4863   }
4864 
4865   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4866   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4867 
4868   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4869   for (i=0; i<merge->nrecv; i++) {
4870     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4871   }
4872 
4873   ierr = PetscFree(len_si);CHKERRQ(ierr);
4874   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4875   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4876   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4877   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4878   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4879   ierr = PetscFree(status);CHKERRQ(ierr);
4880 
4881   /* compute a local seq matrix in each processor */
4882   /*----------------------------------------------*/
4883   /* allocate bi array and free space for accumulating nonzero column info */
4884   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4885   bi[0] = 0;
4886 
4887   /* create and initialize a linked list */
4888   nlnk = N+1;
4889   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4890 
4891   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4892   len  = ai[owners[rank+1]] - ai[owners[rank]];
4893   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4894 
4895   current_space = free_space;
4896 
4897   /* determine symbolic info for each local row */
4898   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4899 
4900   for (k=0; k<merge->nrecv; k++) {
4901     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4902     nrows       = *buf_ri_k[k];
4903     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4904     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4905   }
4906 
4907   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4908   len  = 0;
4909   for (i=0; i<m; i++) {
4910     bnzi = 0;
4911     /* add local non-zero cols of this proc's seqmat into lnk */
4912     arow  = owners[rank] + i;
4913     anzi  = ai[arow+1] - ai[arow];
4914     aj    = a->j + ai[arow];
4915     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4916     bnzi += nlnk;
4917     /* add received col data into lnk */
4918     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4919       if (i == *nextrow[k]) { /* i-th row */
4920         anzi  = *(nextai[k]+1) - *nextai[k];
4921         aj    = buf_rj[k] + *nextai[k];
4922         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4923         bnzi += nlnk;
4924         nextrow[k]++; nextai[k]++;
4925       }
4926     }
4927     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4928 
4929     /* if free space is not available, make more free space */
4930     if (current_space->local_remaining<bnzi) {
4931       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4932       nspacedouble++;
4933     }
4934     /* copy data into free space, then initialize lnk */
4935     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4936     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4937 
4938     current_space->array           += bnzi;
4939     current_space->local_used      += bnzi;
4940     current_space->local_remaining -= bnzi;
4941 
4942     bi[i+1] = bi[i] + bnzi;
4943   }
4944 
4945   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4946 
4947   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4948   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4949   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4950 
4951   /* create symbolic parallel matrix B_mpi */
4952   /*---------------------------------------*/
4953   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4954   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4955   if (n==PETSC_DECIDE) {
4956     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4957   } else {
4958     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4959   }
4960   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4961   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4962   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4963   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4964   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4965 
4966   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4967   B_mpi->assembled  = PETSC_FALSE;
4968   merge->bi         = bi;
4969   merge->bj         = bj;
4970   merge->buf_ri     = buf_ri;
4971   merge->buf_rj     = buf_rj;
4972   merge->coi        = NULL;
4973   merge->coj        = NULL;
4974   merge->owners_co  = NULL;
4975 
4976   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4977 
4978   /* attach the supporting struct to B_mpi for reuse */
4979   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4980   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4981   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4982   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4983   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4984   *mpimat = B_mpi;
4985 
4986   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4987   PetscFunctionReturn(0);
4988 }
4989 
4990 /*@C
4991       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4992                  matrices from each processor
4993 
4994     Collective
4995 
4996    Input Parameters:
4997 +    comm - the communicators the parallel matrix will live on
4998 .    seqmat - the input sequential matrices
4999 .    m - number of local rows (or PETSC_DECIDE)
5000 .    n - number of local columns (or PETSC_DECIDE)
5001 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5002 
5003    Output Parameter:
5004 .    mpimat - the parallel matrix generated
5005 
5006     Level: advanced
5007 
5008    Notes:
5009      The dimensions of the sequential matrix in each processor MUST be the same.
5010      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5011      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5012 @*/
5013 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5014 {
5015   PetscErrorCode ierr;
5016   PetscMPIInt    size;
5017 
5018   PetscFunctionBegin;
5019   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5020   if (size == 1) {
5021     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5022     if (scall == MAT_INITIAL_MATRIX) {
5023       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5024     } else {
5025       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5026     }
5027     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5028     PetscFunctionReturn(0);
5029   }
5030   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5031   if (scall == MAT_INITIAL_MATRIX) {
5032     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5033   }
5034   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5035   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5036   PetscFunctionReturn(0);
5037 }
5038 
5039 /*@
5040      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5041           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5042           with MatGetSize()
5043 
5044     Not Collective
5045 
5046    Input Parameters:
5047 +    A - the matrix
5048 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5049 
5050    Output Parameter:
5051 .    A_loc - the local sequential matrix generated
5052 
5053     Level: developer
5054 
5055    Notes:
5056      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5057      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5058      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5059      modify the values of the returned A_loc.
5060 
5061 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5062 @*/
5063 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5064 {
5065   PetscErrorCode    ierr;
5066   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5067   Mat_SeqAIJ        *mat,*a,*b;
5068   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5069   const PetscScalar *aa,*ba,*aav,*bav;
5070   PetscScalar       *ca,*cam;
5071   PetscMPIInt       size;
5072   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5073   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5074   PetscBool         match;
5075 
5076   PetscFunctionBegin;
5077   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5078   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5079   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5080   if (size == 1) {
5081     if (scall == MAT_INITIAL_MATRIX) {
5082       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5083       *A_loc = mpimat->A;
5084     } else if (scall == MAT_REUSE_MATRIX) {
5085       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5086     }
5087     PetscFunctionReturn(0);
5088   }
5089 
5090   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5091   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5092   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5093   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5094   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5095   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5096   aa   = aav;
5097   ba   = bav;
5098   if (scall == MAT_INITIAL_MATRIX) {
5099     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5100     ci[0] = 0;
5101     for (i=0; i<am; i++) {
5102       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5103     }
5104     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5105     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5106     k    = 0;
5107     for (i=0; i<am; i++) {
5108       ncols_o = bi[i+1] - bi[i];
5109       ncols_d = ai[i+1] - ai[i];
5110       /* off-diagonal portion of A */
5111       for (jo=0; jo<ncols_o; jo++) {
5112         col = cmap[*bj];
5113         if (col >= cstart) break;
5114         cj[k]   = col; bj++;
5115         ca[k++] = *ba++;
5116       }
5117       /* diagonal portion of A */
5118       for (j=0; j<ncols_d; j++) {
5119         cj[k]   = cstart + *aj++;
5120         ca[k++] = *aa++;
5121       }
5122       /* off-diagonal portion of A */
5123       for (j=jo; j<ncols_o; j++) {
5124         cj[k]   = cmap[*bj++];
5125         ca[k++] = *ba++;
5126       }
5127     }
5128     /* put together the new matrix */
5129     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5130     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5131     /* Since these are PETSc arrays, change flags to free them as necessary. */
5132     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5133     mat->free_a  = PETSC_TRUE;
5134     mat->free_ij = PETSC_TRUE;
5135     mat->nonew   = 0;
5136   } else if (scall == MAT_REUSE_MATRIX) {
5137     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5138 #if defined(PETSC_USE_DEVICE)
5139     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5140 #endif
5141     ci = mat->i; cj = mat->j; cam = mat->a;
5142     for (i=0; i<am; i++) {
5143       /* off-diagonal portion of A */
5144       ncols_o = bi[i+1] - bi[i];
5145       for (jo=0; jo<ncols_o; jo++) {
5146         col = cmap[*bj];
5147         if (col >= cstart) break;
5148         *cam++ = *ba++; bj++;
5149       }
5150       /* diagonal portion of A */
5151       ncols_d = ai[i+1] - ai[i];
5152       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5153       /* off-diagonal portion of A */
5154       for (j=jo; j<ncols_o; j++) {
5155         *cam++ = *ba++; bj++;
5156       }
5157     }
5158   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5159   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5160   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5161   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5162   PetscFunctionReturn(0);
5163 }
5164 
5165 /*@
5166      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5167           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5168 
5169     Not Collective
5170 
5171    Input Parameters:
5172 +    A - the matrix
5173 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5174 
5175    Output Parameters:
5176 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5177 -    A_loc - the local sequential matrix generated
5178 
5179     Level: developer
5180 
5181    Notes:
5182      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5183 
5184 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5185 
5186 @*/
5187 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5188 {
5189   PetscErrorCode ierr;
5190   Mat            Ao,Ad;
5191   const PetscInt *cmap;
5192   PetscMPIInt    size;
5193   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5194 
5195   PetscFunctionBegin;
5196   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5197   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5198   if (size == 1) {
5199     if (scall == MAT_INITIAL_MATRIX) {
5200       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5201       *A_loc = Ad;
5202     } else if (scall == MAT_REUSE_MATRIX) {
5203       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5204     }
5205     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5206     PetscFunctionReturn(0);
5207   }
5208   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5209   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5210   if (f) {
5211     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5212   } else {
5213     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5214     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5215     Mat_SeqAIJ        *c;
5216     PetscInt          *ai = a->i, *aj = a->j;
5217     PetscInt          *bi = b->i, *bj = b->j;
5218     PetscInt          *ci,*cj;
5219     const PetscScalar *aa,*ba;
5220     PetscScalar       *ca;
5221     PetscInt          i,j,am,dn,on;
5222 
5223     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5224     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5225     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5226     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5227     if (scall == MAT_INITIAL_MATRIX) {
5228       PetscInt k;
5229       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5230       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5231       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5232       ci[0] = 0;
5233       for (i=0,k=0; i<am; i++) {
5234         const PetscInt ncols_o = bi[i+1] - bi[i];
5235         const PetscInt ncols_d = ai[i+1] - ai[i];
5236         ci[i+1] = ci[i] + ncols_o + ncols_d;
5237         /* diagonal portion of A */
5238         for (j=0; j<ncols_d; j++,k++) {
5239           cj[k] = *aj++;
5240           ca[k] = *aa++;
5241         }
5242         /* off-diagonal portion of A */
5243         for (j=0; j<ncols_o; j++,k++) {
5244           cj[k] = dn + *bj++;
5245           ca[k] = *ba++;
5246         }
5247       }
5248       /* put together the new matrix */
5249       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5250       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5251       /* Since these are PETSc arrays, change flags to free them as necessary. */
5252       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5253       c->free_a  = PETSC_TRUE;
5254       c->free_ij = PETSC_TRUE;
5255       c->nonew   = 0;
5256       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5257     } else if (scall == MAT_REUSE_MATRIX) {
5258 #if defined(PETSC_HAVE_DEVICE)
5259       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5260 #endif
5261       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5262       ca = c->a;
5263       for (i=0; i<am; i++) {
5264         const PetscInt ncols_d = ai[i+1] - ai[i];
5265         const PetscInt ncols_o = bi[i+1] - bi[i];
5266         /* diagonal portion of A */
5267         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5268         /* off-diagonal portion of A */
5269         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5270       }
5271     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5272     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5273     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5274     if (glob) {
5275       PetscInt cst, *gidx;
5276 
5277       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5278       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5279       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5280       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5281       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5282     }
5283   }
5284   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5285   PetscFunctionReturn(0);
5286 }
5287 
5288 /*@C
5289      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5290 
5291     Not Collective
5292 
5293    Input Parameters:
5294 +    A - the matrix
5295 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5296 -    row, col - index sets of rows and columns to extract (or NULL)
5297 
5298    Output Parameter:
5299 .    A_loc - the local sequential matrix generated
5300 
5301     Level: developer
5302 
5303 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5304 
5305 @*/
5306 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5307 {
5308   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5309   PetscErrorCode ierr;
5310   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5311   IS             isrowa,iscola;
5312   Mat            *aloc;
5313   PetscBool      match;
5314 
5315   PetscFunctionBegin;
5316   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5317   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5318   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5319   if (!row) {
5320     start = A->rmap->rstart; end = A->rmap->rend;
5321     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5322   } else {
5323     isrowa = *row;
5324   }
5325   if (!col) {
5326     start = A->cmap->rstart;
5327     cmap  = a->garray;
5328     nzA   = a->A->cmap->n;
5329     nzB   = a->B->cmap->n;
5330     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5331     ncols = 0;
5332     for (i=0; i<nzB; i++) {
5333       if (cmap[i] < start) idx[ncols++] = cmap[i];
5334       else break;
5335     }
5336     imark = i;
5337     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5338     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5339     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5340   } else {
5341     iscola = *col;
5342   }
5343   if (scall != MAT_INITIAL_MATRIX) {
5344     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5345     aloc[0] = *A_loc;
5346   }
5347   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5348   if (!col) { /* attach global id of condensed columns */
5349     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5350   }
5351   *A_loc = aloc[0];
5352   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5353   if (!row) {
5354     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5355   }
5356   if (!col) {
5357     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5358   }
5359   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5360   PetscFunctionReturn(0);
5361 }
5362 
5363 /*
5364  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5365  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5366  * on a global size.
5367  * */
5368 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5369 {
5370   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5371   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5372   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5373   PetscMPIInt              owner;
5374   PetscSFNode              *iremote,*oiremote;
5375   const PetscInt           *lrowindices;
5376   PetscErrorCode           ierr;
5377   PetscSF                  sf,osf;
5378   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5379   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5380   MPI_Comm                 comm;
5381   ISLocalToGlobalMapping   mapping;
5382 
5383   PetscFunctionBegin;
5384   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5385   /* plocalsize is the number of roots
5386    * nrows is the number of leaves
5387    * */
5388   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5389   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5390   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5391   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5392   for (i=0;i<nrows;i++) {
5393     /* Find a remote index and an owner for a row
5394      * The row could be local or remote
5395      * */
5396     owner = 0;
5397     lidx  = 0;
5398     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5399     iremote[i].index = lidx;
5400     iremote[i].rank  = owner;
5401   }
5402   /* Create SF to communicate how many nonzero columns for each row */
5403   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5404   /* SF will figure out the number of nonzero colunms for each row, and their
5405    * offsets
5406    * */
5407   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5408   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5409   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5410 
5411   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5412   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5413   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5414   roffsets[0] = 0;
5415   roffsets[1] = 0;
5416   for (i=0;i<plocalsize;i++) {
5417     /* diag */
5418     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5419     /* off diag */
5420     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5421     /* compute offsets so that we relative location for each row */
5422     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5423     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5424   }
5425   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5426   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5427   /* 'r' means root, and 'l' means leaf */
5428   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5429   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5430   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5431   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5432   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5433   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5434   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5435   dntotalcols = 0;
5436   ontotalcols = 0;
5437   ncol = 0;
5438   for (i=0;i<nrows;i++) {
5439     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5440     ncol = PetscMax(pnnz[i],ncol);
5441     /* diag */
5442     dntotalcols += nlcols[i*2+0];
5443     /* off diag */
5444     ontotalcols += nlcols[i*2+1];
5445   }
5446   /* We do not need to figure the right number of columns
5447    * since all the calculations will be done by going through the raw data
5448    * */
5449   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5450   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5451   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5452   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5453   /* diag */
5454   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5455   /* off diag */
5456   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5457   /* diag */
5458   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5459   /* off diag */
5460   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5461   dntotalcols = 0;
5462   ontotalcols = 0;
5463   ntotalcols  = 0;
5464   for (i=0;i<nrows;i++) {
5465     owner = 0;
5466     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5467     /* Set iremote for diag matrix */
5468     for (j=0;j<nlcols[i*2+0];j++) {
5469       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5470       iremote[dntotalcols].rank    = owner;
5471       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5472       ilocal[dntotalcols++]        = ntotalcols++;
5473     }
5474     /* off diag */
5475     for (j=0;j<nlcols[i*2+1];j++) {
5476       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5477       oiremote[ontotalcols].rank    = owner;
5478       oilocal[ontotalcols++]        = ntotalcols++;
5479     }
5480   }
5481   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5482   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5483   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5484   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5485   /* P serves as roots and P_oth is leaves
5486    * Diag matrix
5487    * */
5488   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5489   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5490   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5491 
5492   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5493   /* Off diag */
5494   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5495   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5496   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5497   /* We operate on the matrix internal data for saving memory */
5498   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5499   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5500   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5501   /* Convert to global indices for diag matrix */
5502   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5503   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5504   /* We want P_oth store global indices */
5505   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5506   /* Use memory scalable approach */
5507   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5508   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5509   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5510   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5511   /* Convert back to local indices */
5512   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5513   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5514   nout = 0;
5515   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5516   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5517   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5518   /* Exchange values */
5519   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5520   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5521   /* Stop PETSc from shrinking memory */
5522   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5523   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5524   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5525   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5526   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5527   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5528   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5529   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5530   PetscFunctionReturn(0);
5531 }
5532 
5533 /*
5534  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5535  * This supports MPIAIJ and MAIJ
5536  * */
5537 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5538 {
5539   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5540   Mat_SeqAIJ            *p_oth;
5541   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5542   IS                    rows,map;
5543   PetscHMapI            hamp;
5544   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5545   MPI_Comm              comm;
5546   PetscSF               sf,osf;
5547   PetscBool             has;
5548   PetscErrorCode        ierr;
5549 
5550   PetscFunctionBegin;
5551   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5552   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5553   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5554    *  and then create a submatrix (that often is an overlapping matrix)
5555    * */
5556   if (reuse == MAT_INITIAL_MATRIX) {
5557     /* Use a hash table to figure out unique keys */
5558     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5559     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5560     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5561     count = 0;
5562     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5563     for (i=0;i<a->B->cmap->n;i++) {
5564       key  = a->garray[i]/dof;
5565       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5566       if (!has) {
5567         mapping[i] = count;
5568         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5569       } else {
5570         /* Current 'i' has the same value the previous step */
5571         mapping[i] = count-1;
5572       }
5573     }
5574     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5575     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5576     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5577     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5578     off = 0;
5579     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5580     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5581     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5582     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5583     /* In case, the matrix was already created but users want to recreate the matrix */
5584     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5585     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5586     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5587     ierr = ISDestroy(&map);CHKERRQ(ierr);
5588     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5589   } else if (reuse == MAT_REUSE_MATRIX) {
5590     /* If matrix was already created, we simply update values using SF objects
5591      * that as attached to the matrix ealier.
5592      *  */
5593     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5594     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5595     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5596     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5597     /* Update values in place */
5598     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5599     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5600     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5601     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5602   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5603   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5604   PetscFunctionReturn(0);
5605 }
5606 
5607 /*@C
5608     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5609 
5610     Collective on Mat
5611 
5612    Input Parameters:
5613 +    A - the first matrix in mpiaij format
5614 .    B - the second matrix in mpiaij format
5615 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5616 
5617    Input/Output Parameters:
5618 +    rowb - index sets of rows of B to extract (or NULL), modified on output
5619 -    colb - index sets of columns of B to extract (or NULL), modified on output
5620 
5621    Output Parameter:
5622 .    B_seq - the sequential matrix generated
5623 
5624     Level: developer
5625 
5626 @*/
5627 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5628 {
5629   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5630   PetscErrorCode ierr;
5631   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5632   IS             isrowb,iscolb;
5633   Mat            *bseq=NULL;
5634 
5635   PetscFunctionBegin;
5636   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5637     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5638   }
5639   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5640 
5641   if (scall == MAT_INITIAL_MATRIX) {
5642     start = A->cmap->rstart;
5643     cmap  = a->garray;
5644     nzA   = a->A->cmap->n;
5645     nzB   = a->B->cmap->n;
5646     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5647     ncols = 0;
5648     for (i=0; i<nzB; i++) {  /* row < local row index */
5649       if (cmap[i] < start) idx[ncols++] = cmap[i];
5650       else break;
5651     }
5652     imark = i;
5653     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5654     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5655     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5656     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5657   } else {
5658     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5659     isrowb  = *rowb; iscolb = *colb;
5660     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5661     bseq[0] = *B_seq;
5662   }
5663   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5664   *B_seq = bseq[0];
5665   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5666   if (!rowb) {
5667     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5668   } else {
5669     *rowb = isrowb;
5670   }
5671   if (!colb) {
5672     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5673   } else {
5674     *colb = iscolb;
5675   }
5676   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5677   PetscFunctionReturn(0);
5678 }
5679 
5680 /*
5681     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5682     of the OFF-DIAGONAL portion of local A
5683 
5684     Collective on Mat
5685 
5686    Input Parameters:
5687 +    A,B - the matrices in mpiaij format
5688 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5689 
5690    Output Parameter:
5691 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5692 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5693 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5694 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5695 
5696     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5697      for this matrix. This is not desirable..
5698 
5699     Level: developer
5700 
5701 */
5702 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5703 {
5704   PetscErrorCode         ierr;
5705   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5706   Mat_SeqAIJ             *b_oth;
5707   VecScatter             ctx;
5708   MPI_Comm               comm;
5709   const PetscMPIInt      *rprocs,*sprocs;
5710   const PetscInt         *srow,*rstarts,*sstarts;
5711   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5712   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5713   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5714   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5715   PetscMPIInt            size,tag,rank,nreqs;
5716 
5717   PetscFunctionBegin;
5718   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5719   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5720 
5721   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5722     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5723   }
5724   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5725   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5726 
5727   if (size == 1) {
5728     startsj_s = NULL;
5729     bufa_ptr  = NULL;
5730     *B_oth    = NULL;
5731     PetscFunctionReturn(0);
5732   }
5733 
5734   ctx = a->Mvctx;
5735   tag = ((PetscObject)ctx)->tag;
5736 
5737   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5738   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5739   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5740   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5741   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5742   rwaits = reqs;
5743   swaits = reqs + nrecvs;
5744 
5745   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5746   if (scall == MAT_INITIAL_MATRIX) {
5747     /* i-array */
5748     /*---------*/
5749     /*  post receives */
5750     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5751     for (i=0; i<nrecvs; i++) {
5752       rowlen = rvalues + rstarts[i]*rbs;
5753       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5754       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5755     }
5756 
5757     /* pack the outgoing message */
5758     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5759 
5760     sstartsj[0] = 0;
5761     rstartsj[0] = 0;
5762     len         = 0; /* total length of j or a array to be sent */
5763     if (nsends) {
5764       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5765       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5766     }
5767     for (i=0; i<nsends; i++) {
5768       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5769       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5770       for (j=0; j<nrows; j++) {
5771         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5772         for (l=0; l<sbs; l++) {
5773           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5774 
5775           rowlen[j*sbs+l] = ncols;
5776 
5777           len += ncols;
5778           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5779         }
5780         k++;
5781       }
5782       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5783 
5784       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5785     }
5786     /* recvs and sends of i-array are completed */
5787     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5788     ierr = PetscFree(svalues);CHKERRQ(ierr);
5789 
5790     /* allocate buffers for sending j and a arrays */
5791     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5792     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5793 
5794     /* create i-array of B_oth */
5795     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5796 
5797     b_othi[0] = 0;
5798     len       = 0; /* total length of j or a array to be received */
5799     k         = 0;
5800     for (i=0; i<nrecvs; i++) {
5801       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5802       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5803       for (j=0; j<nrows; j++) {
5804         b_othi[k+1] = b_othi[k] + rowlen[j];
5805         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5806         k++;
5807       }
5808       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5809     }
5810     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5811 
5812     /* allocate space for j and a arrrays of B_oth */
5813     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5814     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5815 
5816     /* j-array */
5817     /*---------*/
5818     /*  post receives of j-array */
5819     for (i=0; i<nrecvs; i++) {
5820       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5821       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5822     }
5823 
5824     /* pack the outgoing message j-array */
5825     if (nsends) k = sstarts[0];
5826     for (i=0; i<nsends; i++) {
5827       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5828       bufJ  = bufj+sstartsj[i];
5829       for (j=0; j<nrows; j++) {
5830         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5831         for (ll=0; ll<sbs; ll++) {
5832           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5833           for (l=0; l<ncols; l++) {
5834             *bufJ++ = cols[l];
5835           }
5836           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5837         }
5838       }
5839       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5840     }
5841 
5842     /* recvs and sends of j-array are completed */
5843     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5844   } else if (scall == MAT_REUSE_MATRIX) {
5845     sstartsj = *startsj_s;
5846     rstartsj = *startsj_r;
5847     bufa     = *bufa_ptr;
5848     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5849     b_otha   = b_oth->a;
5850 #if defined(PETSC_HAVE_DEVICE)
5851     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5852 #endif
5853   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5854 
5855   /* a-array */
5856   /*---------*/
5857   /*  post receives of a-array */
5858   for (i=0; i<nrecvs; i++) {
5859     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5860     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5861   }
5862 
5863   /* pack the outgoing message a-array */
5864   if (nsends) k = sstarts[0];
5865   for (i=0; i<nsends; i++) {
5866     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5867     bufA  = bufa+sstartsj[i];
5868     for (j=0; j<nrows; j++) {
5869       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5870       for (ll=0; ll<sbs; ll++) {
5871         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5872         for (l=0; l<ncols; l++) {
5873           *bufA++ = vals[l];
5874         }
5875         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5876       }
5877     }
5878     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5879   }
5880   /* recvs and sends of a-array are completed */
5881   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5882   ierr = PetscFree(reqs);CHKERRQ(ierr);
5883 
5884   if (scall == MAT_INITIAL_MATRIX) {
5885     /* put together the new matrix */
5886     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5887 
5888     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5889     /* Since these are PETSc arrays, change flags to free them as necessary. */
5890     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5891     b_oth->free_a  = PETSC_TRUE;
5892     b_oth->free_ij = PETSC_TRUE;
5893     b_oth->nonew   = 0;
5894 
5895     ierr = PetscFree(bufj);CHKERRQ(ierr);
5896     if (!startsj_s || !bufa_ptr) {
5897       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5898       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5899     } else {
5900       *startsj_s = sstartsj;
5901       *startsj_r = rstartsj;
5902       *bufa_ptr  = bufa;
5903     }
5904   }
5905 
5906   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5907   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5908   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5909   PetscFunctionReturn(0);
5910 }
5911 
5912 /*@C
5913   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5914 
5915   Not Collective
5916 
5917   Input Parameter:
5918 . A - The matrix in mpiaij format
5919 
5920   Output Parameters:
5921 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5922 . colmap - A map from global column index to local index into lvec
5923 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5924 
5925   Level: developer
5926 
5927 @*/
5928 #if defined(PETSC_USE_CTABLE)
5929 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5930 #else
5931 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5932 #endif
5933 {
5934   Mat_MPIAIJ *a;
5935 
5936   PetscFunctionBegin;
5937   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5938   PetscValidPointer(lvec, 2);
5939   PetscValidPointer(colmap, 3);
5940   PetscValidPointer(multScatter, 4);
5941   a = (Mat_MPIAIJ*) A->data;
5942   if (lvec) *lvec = a->lvec;
5943   if (colmap) *colmap = a->colmap;
5944   if (multScatter) *multScatter = a->Mvctx;
5945   PetscFunctionReturn(0);
5946 }
5947 
5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5949 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5951 #if defined(PETSC_HAVE_MKL_SPARSE)
5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5953 #endif
5954 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5956 #if defined(PETSC_HAVE_ELEMENTAL)
5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5958 #endif
5959 #if defined(PETSC_HAVE_SCALAPACK)
5960 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5961 #endif
5962 #if defined(PETSC_HAVE_HYPRE)
5963 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5964 #endif
5965 #if defined(PETSC_HAVE_CUDA)
5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5967 #endif
5968 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5970 #endif
5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5972 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5973 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5974 
5975 /*
5976     Computes (B'*A')' since computing B*A directly is untenable
5977 
5978                n                       p                          p
5979         [             ]       [             ]         [                 ]
5980       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5981         [             ]       [             ]         [                 ]
5982 
5983 */
5984 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5985 {
5986   PetscErrorCode ierr;
5987   Mat            At,Bt,Ct;
5988 
5989   PetscFunctionBegin;
5990   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5991   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5992   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5993   ierr = MatDestroy(&At);CHKERRQ(ierr);
5994   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5995   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5996   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5997   PetscFunctionReturn(0);
5998 }
5999 
6000 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6001 {
6002   PetscErrorCode ierr;
6003   PetscBool      cisdense;
6004 
6005   PetscFunctionBegin;
6006   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
6007   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
6008   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6009   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6010   if (!cisdense) {
6011     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6012   }
6013   ierr = MatSetUp(C);CHKERRQ(ierr);
6014 
6015   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6016   PetscFunctionReturn(0);
6017 }
6018 
6019 /* ----------------------------------------------------------------*/
6020 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6021 {
6022   Mat_Product *product = C->product;
6023   Mat         A = product->A,B=product->B;
6024 
6025   PetscFunctionBegin;
6026   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6027     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6028 
6029   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6030   C->ops->productsymbolic = MatProductSymbolic_AB;
6031   PetscFunctionReturn(0);
6032 }
6033 
6034 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6035 {
6036   PetscErrorCode ierr;
6037   Mat_Product    *product = C->product;
6038 
6039   PetscFunctionBegin;
6040   if (product->type == MATPRODUCT_AB) {
6041     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6042   }
6043   PetscFunctionReturn(0);
6044 }
6045 /* ----------------------------------------------------------------*/
6046 
6047 /*MC
6048    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6049 
6050    Options Database Keys:
6051 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6052 
6053    Level: beginner
6054 
6055    Notes:
6056     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6057     in this case the values associated with the rows and columns one passes in are set to zero
6058     in the matrix
6059 
6060     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6061     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6062 
6063 .seealso: MatCreateAIJ()
6064 M*/
6065 
6066 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6067 {
6068   Mat_MPIAIJ     *b;
6069   PetscErrorCode ierr;
6070   PetscMPIInt    size;
6071 
6072   PetscFunctionBegin;
6073   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6074 
6075   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6076   B->data       = (void*)b;
6077   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6078   B->assembled  = PETSC_FALSE;
6079   B->insertmode = NOT_SET_VALUES;
6080   b->size       = size;
6081 
6082   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6083 
6084   /* build cache for off array entries formed */
6085   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6086 
6087   b->donotstash  = PETSC_FALSE;
6088   b->colmap      = NULL;
6089   b->garray      = NULL;
6090   b->roworiented = PETSC_TRUE;
6091 
6092   /* stuff used for matrix vector multiply */
6093   b->lvec  = NULL;
6094   b->Mvctx = NULL;
6095 
6096   /* stuff for MatGetRow() */
6097   b->rowindices   = NULL;
6098   b->rowvalues    = NULL;
6099   b->getrowactive = PETSC_FALSE;
6100 
6101   /* flexible pointer used in CUSPARSE classes */
6102   b->spptr = NULL;
6103 
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6114 #if defined(PETSC_HAVE_CUDA)
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6116 #endif
6117 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6119 #endif
6120 #if defined(PETSC_HAVE_MKL_SPARSE)
6121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6122 #endif
6123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6124   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6127 #if defined(PETSC_HAVE_ELEMENTAL)
6128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6129 #endif
6130 #if defined(PETSC_HAVE_SCALAPACK)
6131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6132 #endif
6133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6135 #if defined(PETSC_HAVE_HYPRE)
6136   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6137   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6138 #endif
6139   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6140   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6141   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6142   PetscFunctionReturn(0);
6143 }
6144 
6145 /*@C
6146      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6147          and "off-diagonal" part of the matrix in CSR format.
6148 
6149    Collective
6150 
6151    Input Parameters:
6152 +  comm - MPI communicator
6153 .  m - number of local rows (Cannot be PETSC_DECIDE)
6154 .  n - This value should be the same as the local size used in creating the
6155        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6156        calculated if N is given) For square matrices n is almost always m.
6157 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6158 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6159 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6160 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6161 .   a - matrix values
6162 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6163 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6164 -   oa - matrix values
6165 
6166    Output Parameter:
6167 .   mat - the matrix
6168 
6169    Level: advanced
6170 
6171    Notes:
6172        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6173        must free the arrays once the matrix has been destroyed and not before.
6174 
6175        The i and j indices are 0 based
6176 
6177        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6178 
6179        This sets local rows and cannot be used to set off-processor values.
6180 
6181        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6182        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6183        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6184        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6185        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6186        communication if it is known that only local entries will be set.
6187 
6188 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6189           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6190 @*/
6191 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6192 {
6193   PetscErrorCode ierr;
6194   Mat_MPIAIJ     *maij;
6195 
6196   PetscFunctionBegin;
6197   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6198   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6199   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6200   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6201   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6202   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6203   maij = (Mat_MPIAIJ*) (*mat)->data;
6204 
6205   (*mat)->preallocated = PETSC_TRUE;
6206 
6207   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6208   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6209 
6210   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6211   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6212 
6213   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6214   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6215   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6216   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6217   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6218   PetscFunctionReturn(0);
6219 }
6220 
6221 /*
6222     Special version for direct calls from Fortran
6223 */
6224 #include <petsc/private/fortranimpl.h>
6225 
6226 /* Change these macros so can be used in void function */
6227 #undef CHKERRQ
6228 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6229 #undef SETERRQ2
6230 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6231 #undef SETERRQ3
6232 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6233 #undef SETERRQ
6234 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6235 
6236 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6237 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6238 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6239 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6240 #else
6241 #endif
6242 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6243 {
6244   Mat            mat  = *mmat;
6245   PetscInt       m    = *mm, n = *mn;
6246   InsertMode     addv = *maddv;
6247   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6248   PetscScalar    value;
6249   PetscErrorCode ierr;
6250 
6251   MatCheckPreallocated(mat,1);
6252   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6253   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6254   {
6255     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6256     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6257     PetscBool roworiented = aij->roworiented;
6258 
6259     /* Some Variables required in the macro */
6260     Mat        A                    = aij->A;
6261     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6262     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6263     MatScalar  *aa                  = a->a;
6264     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6265     Mat        B                    = aij->B;
6266     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6267     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6268     MatScalar  *ba                  = b->a;
6269     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6270      * cannot use "#if defined" inside a macro. */
6271     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6272 
6273     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6274     PetscInt  nonew = a->nonew;
6275     MatScalar *ap1,*ap2;
6276 
6277     PetscFunctionBegin;
6278     for (i=0; i<m; i++) {
6279       if (im[i] < 0) continue;
6280       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6281       if (im[i] >= rstart && im[i] < rend) {
6282         row      = im[i] - rstart;
6283         lastcol1 = -1;
6284         rp1      = aj + ai[row];
6285         ap1      = aa + ai[row];
6286         rmax1    = aimax[row];
6287         nrow1    = ailen[row];
6288         low1     = 0;
6289         high1    = nrow1;
6290         lastcol2 = -1;
6291         rp2      = bj + bi[row];
6292         ap2      = ba + bi[row];
6293         rmax2    = bimax[row];
6294         nrow2    = bilen[row];
6295         low2     = 0;
6296         high2    = nrow2;
6297 
6298         for (j=0; j<n; j++) {
6299           if (roworiented) value = v[i*n+j];
6300           else value = v[i+j*m];
6301           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6302           if (in[j] >= cstart && in[j] < cend) {
6303             col = in[j] - cstart;
6304             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6305 #if defined(PETSC_HAVE_DEVICE)
6306             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6307 #endif
6308           } else if (in[j] < 0) continue;
6309           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6310             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6311             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6312           } else {
6313             if (mat->was_assembled) {
6314               if (!aij->colmap) {
6315                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6316               }
6317 #if defined(PETSC_USE_CTABLE)
6318               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6319               col--;
6320 #else
6321               col = aij->colmap[in[j]] - 1;
6322 #endif
6323               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6324                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6325                 col  =  in[j];
6326                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6327                 B        = aij->B;
6328                 b        = (Mat_SeqAIJ*)B->data;
6329                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6330                 rp2      = bj + bi[row];
6331                 ap2      = ba + bi[row];
6332                 rmax2    = bimax[row];
6333                 nrow2    = bilen[row];
6334                 low2     = 0;
6335                 high2    = nrow2;
6336                 bm       = aij->B->rmap->n;
6337                 ba       = b->a;
6338                 inserted = PETSC_FALSE;
6339               }
6340             } else col = in[j];
6341             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6342 #if defined(PETSC_HAVE_DEVICE)
6343             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6344 #endif
6345           }
6346         }
6347       } else if (!aij->donotstash) {
6348         if (roworiented) {
6349           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6350         } else {
6351           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6352         }
6353       }
6354     }
6355   }
6356   PetscFunctionReturnVoid();
6357 }
6358 
6359 typedef struct {
6360   Mat       *mp;    /* intermediate products */
6361   PetscBool *mptmp; /* is the intermediate product temporary ? */
6362   PetscInt  cp;     /* number of intermediate products */
6363 
6364   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6365   PetscInt    *startsj_s,*startsj_r;
6366   PetscScalar *bufa;
6367   Mat         P_oth;
6368 
6369   /* may take advantage of merging product->B */
6370   Mat Bloc; /* B-local by merging diag and off-diag */
6371 
6372   /* cusparse does not have support to split between symbolic and numeric phases.
6373      When api_user is true, we don't need to update the numerical values
6374      of the temporary storage */
6375   PetscBool reusesym;
6376 
6377   /* support for COO values insertion */
6378   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6379   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6380   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6381   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6382   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6383   PetscMemType mtype;
6384 
6385   /* customization */
6386   PetscBool abmerge;
6387   PetscBool P_oth_bind;
6388 } MatMatMPIAIJBACKEND;
6389 
6390 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6391 {
6392   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6393   PetscInt            i;
6394   PetscErrorCode      ierr;
6395 
6396   PetscFunctionBegin;
6397   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6398   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6399   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6400   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6401   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6402   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6403   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6404   for (i = 0; i < mmdata->cp; i++) {
6405     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6406   }
6407   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6408   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6409   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6410   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6411   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6412   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6413   PetscFunctionReturn(0);
6414 }
6415 
6416 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6417 {
6418   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6419   PetscErrorCode ierr;
6420 
6421   PetscFunctionBegin;
6422   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6423   if (f) {
6424     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6425   } else {
6426     const PetscScalar *vv;
6427 
6428     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6429     if (n && idx) {
6430       PetscScalar    *w = v;
6431       const PetscInt *oi = idx;
6432       PetscInt       j;
6433 
6434       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6435     } else {
6436       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6437     }
6438     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6439   }
6440   PetscFunctionReturn(0);
6441 }
6442 
6443 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6444 {
6445   MatMatMPIAIJBACKEND *mmdata;
6446   PetscInt            i,n_d,n_o;
6447   PetscErrorCode      ierr;
6448 
6449   PetscFunctionBegin;
6450   MatCheckProduct(C,1);
6451   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6452   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6453   if (!mmdata->reusesym) { /* update temporary matrices */
6454     if (mmdata->P_oth) {
6455       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6456     }
6457     if (mmdata->Bloc) {
6458       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6459     }
6460   }
6461   mmdata->reusesym = PETSC_FALSE;
6462 
6463   for (i = 0; i < mmdata->cp; i++) {
6464     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6465     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6466   }
6467   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6468     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6469 
6470     if (mmdata->mptmp[i]) continue;
6471     if (noff) {
6472       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6473 
6474       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6475       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6476       n_o += noff;
6477       n_d += nown;
6478     } else {
6479       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6480 
6481       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6482       n_d += mm->nz;
6483     }
6484   }
6485   if (mmdata->hasoffproc) { /* offprocess insertion */
6486     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6487     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6488   }
6489   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6490   PetscFunctionReturn(0);
6491 }
6492 
6493 /* Support for Pt * A, A * P, or Pt * A * P */
6494 #define MAX_NUMBER_INTERMEDIATE 4
6495 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6496 {
6497   Mat_Product            *product = C->product;
6498   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6499   Mat_MPIAIJ             *a,*p;
6500   MatMatMPIAIJBACKEND    *mmdata;
6501   ISLocalToGlobalMapping P_oth_l2g = NULL;
6502   IS                     glob = NULL;
6503   const char             *prefix;
6504   char                   pprefix[256];
6505   const PetscInt         *globidx,*P_oth_idx;
6506   PetscInt               i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j;
6507   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6508                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6509                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6510   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6511 
6512   MatProductType         ptype;
6513   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6514   PetscMPIInt            size;
6515   PetscErrorCode         ierr;
6516 
6517   PetscFunctionBegin;
6518   MatCheckProduct(C,1);
6519   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6520   ptype = product->type;
6521   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6522     ptype = MATPRODUCT_AB;
6523     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6524   }
6525   switch (ptype) {
6526   case MATPRODUCT_AB:
6527     A = product->A;
6528     P = product->B;
6529     m = A->rmap->n;
6530     n = P->cmap->n;
6531     M = A->rmap->N;
6532     N = P->cmap->N;
6533     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6534     break;
6535   case MATPRODUCT_AtB:
6536     P = product->A;
6537     A = product->B;
6538     m = P->cmap->n;
6539     n = A->cmap->n;
6540     M = P->cmap->N;
6541     N = A->cmap->N;
6542     hasoffproc = PETSC_TRUE;
6543     break;
6544   case MATPRODUCT_PtAP:
6545     A = product->A;
6546     P = product->B;
6547     m = P->cmap->n;
6548     n = P->cmap->n;
6549     M = P->cmap->N;
6550     N = P->cmap->N;
6551     hasoffproc = PETSC_TRUE;
6552     break;
6553   default:
6554     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6555   }
6556   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6557   if (size == 1) hasoffproc = PETSC_FALSE;
6558 
6559   /* defaults */
6560   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6561     mp[i]    = NULL;
6562     mptmp[i] = PETSC_FALSE;
6563     rmapt[i] = -1;
6564     cmapt[i] = -1;
6565     rmapa[i] = NULL;
6566     cmapa[i] = NULL;
6567   }
6568 
6569   /* customization */
6570   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6571   mmdata->reusesym = product->api_user;
6572   if (ptype == MATPRODUCT_AB) {
6573     if (product->api_user) {
6574       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6575       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6576       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6577       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6578     } else {
6579       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6580       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6581       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6582       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6583     }
6584   } else if (ptype == MATPRODUCT_PtAP) {
6585     if (product->api_user) {
6586       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6587       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6588       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6589     } else {
6590       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6591       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6592       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6593     }
6594   }
6595   a = (Mat_MPIAIJ*)A->data;
6596   p = (Mat_MPIAIJ*)P->data;
6597   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6598   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6599   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6600   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6601   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6602 
6603   cp   = 0;
6604   switch (ptype) {
6605   case MATPRODUCT_AB: /* A * P */
6606     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6607 
6608     /* A_diag * P_local (merged or not) */
6609     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6610       /* P is product->B */
6611       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6612       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6613       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6614       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6615       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6616       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6617       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6618       mp[cp]->product->api_user = product->api_user;
6619       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6620       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6621       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6622       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6623       rmapt[cp] = 1;
6624       cmapt[cp] = 2;
6625       cmapa[cp] = globidx;
6626       mptmp[cp] = PETSC_FALSE;
6627       cp++;
6628     } else { /* A_diag * P_diag and A_diag * P_off */
6629       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6630       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6631       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6632       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6633       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6634       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6635       mp[cp]->product->api_user = product->api_user;
6636       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6637       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6638       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6639       rmapt[cp] = 1;
6640       cmapt[cp] = 1;
6641       mptmp[cp] = PETSC_FALSE;
6642       cp++;
6643       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6644       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6645       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6646       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6647       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6648       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6649       mp[cp]->product->api_user = product->api_user;
6650       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6651       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6652       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6653       rmapt[cp] = 1;
6654       cmapt[cp] = 2;
6655       cmapa[cp] = p->garray;
6656       mptmp[cp] = PETSC_FALSE;
6657       cp++;
6658     }
6659 
6660     /* A_off * P_other */
6661     if (mmdata->P_oth) {
6662       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
6663       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6664       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6665       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6666       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6667       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6668       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6669       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6670       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6671       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6672       mp[cp]->product->api_user = product->api_user;
6673       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6674       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6675       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6676       rmapt[cp] = 1;
6677       cmapt[cp] = 2;
6678       cmapa[cp] = P_oth_idx;
6679       mptmp[cp] = PETSC_FALSE;
6680       cp++;
6681     }
6682     break;
6683 
6684   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6685     /* A is product->B */
6686     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6687     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6688       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6689       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6690       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6691       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6692       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6693       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6694       mp[cp]->product->api_user = product->api_user;
6695       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6696       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6697       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6698       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6699       rmapt[cp] = 2;
6700       rmapa[cp] = globidx;
6701       cmapt[cp] = 2;
6702       cmapa[cp] = globidx;
6703       mptmp[cp] = PETSC_FALSE;
6704       cp++;
6705     } else {
6706       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6707       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6708       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6709       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6710       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6711       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6712       mp[cp]->product->api_user = product->api_user;
6713       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6714       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6715       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6716       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6717       rmapt[cp] = 1;
6718       cmapt[cp] = 2;
6719       cmapa[cp] = globidx;
6720       mptmp[cp] = PETSC_FALSE;
6721       cp++;
6722       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6723       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6724       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6725       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6726       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6727       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6728       mp[cp]->product->api_user = product->api_user;
6729       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6730       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6731       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6732       rmapt[cp] = 2;
6733       rmapa[cp] = p->garray;
6734       cmapt[cp] = 2;
6735       cmapa[cp] = globidx;
6736       mptmp[cp] = PETSC_FALSE;
6737       cp++;
6738     }
6739     break;
6740   case MATPRODUCT_PtAP:
6741     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6742     /* P is product->B */
6743     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6744     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6745     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6746     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6747     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6748     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6749     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6750     mp[cp]->product->api_user = product->api_user;
6751     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6752     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6753     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6754     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6755     rmapt[cp] = 2;
6756     rmapa[cp] = globidx;
6757     cmapt[cp] = 2;
6758     cmapa[cp] = globidx;
6759     mptmp[cp] = PETSC_FALSE;
6760     cp++;
6761     if (mmdata->P_oth) {
6762       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6763       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6764       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6765       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6766       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6767       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6768       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6769       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6770       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6771       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6772       mp[cp]->product->api_user = product->api_user;
6773       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6774       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6775       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6776       mptmp[cp] = PETSC_TRUE;
6777       cp++;
6778       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6779       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6780       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6781       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6782       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6783       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6784       mp[cp]->product->api_user = product->api_user;
6785       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6786       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6787       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6788       rmapt[cp] = 2;
6789       rmapa[cp] = globidx;
6790       cmapt[cp] = 2;
6791       cmapa[cp] = P_oth_idx;
6792       mptmp[cp] = PETSC_FALSE;
6793       cp++;
6794     }
6795     break;
6796   default:
6797     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6798   }
6799   /* sanity check */
6800   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6801 
6802   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
6803   for (i = 0; i < cp; i++) {
6804     mmdata->mp[i]    = mp[i];
6805     mmdata->mptmp[i] = mptmp[i];
6806   }
6807   mmdata->cp = cp;
6808   C->product->data       = mmdata;
6809   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6810   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6811 
6812   /* memory type */
6813   mmdata->mtype = PETSC_MEMTYPE_HOST;
6814   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6815   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6816   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6817   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6818   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6819 
6820   /* prepare coo coordinates for values insertion */
6821 
6822   /* count total nonzeros of those intermediate seqaij Mats
6823     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
6824     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
6825     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
6826   */
6827   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6828     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6829     if (mptmp[cp]) continue;
6830     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
6831       const PetscInt *rmap = rmapa[cp];
6832       const PetscInt mr = mp[cp]->rmap->n;
6833       const PetscInt rs = C->rmap->rstart;
6834       const PetscInt re = C->rmap->rend;
6835       const PetscInt *ii  = mm->i;
6836       for (i = 0; i < mr; i++) {
6837         const PetscInt gr = rmap[i];
6838         const PetscInt nz = ii[i+1] - ii[i];
6839         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
6840         else ncoo_oown += nz; /* this row is local */
6841       }
6842     } else ncoo_d += mm->nz;
6843   }
6844 
6845   /*
6846     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
6847 
6848     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
6849 
6850     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
6851 
6852     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
6853     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
6854     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
6855 
6856     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
6857     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
6858   */
6859   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
6860   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6861 
6862   /* gather (i,j) of nonzeros inserted by remote procs */
6863   if (hasoffproc) {
6864     PetscSF  msf;
6865     PetscInt ncoo2,*coo_i2,*coo_j2;
6866 
6867     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6868     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6869     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
6870 
6871     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6872       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6873       PetscInt   *idxoff = mmdata->off[cp];
6874       PetscInt   *idxown = mmdata->own[cp];
6875       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
6876         const PetscInt *rmap = rmapa[cp];
6877         const PetscInt *cmap = cmapa[cp];
6878         const PetscInt *ii  = mm->i;
6879         PetscInt       *coi = coo_i + ncoo_o;
6880         PetscInt       *coj = coo_j + ncoo_o;
6881         const PetscInt mr = mp[cp]->rmap->n;
6882         const PetscInt rs = C->rmap->rstart;
6883         const PetscInt re = C->rmap->rend;
6884         const PetscInt cs = C->cmap->rstart;
6885         for (i = 0; i < mr; i++) {
6886           const PetscInt *jj = mm->j + ii[i];
6887           const PetscInt gr  = rmap[i];
6888           const PetscInt nz  = ii[i+1] - ii[i];
6889           if (gr < rs || gr >= re) { /* this is an offproc row */
6890             for (j = ii[i]; j < ii[i+1]; j++) {
6891               *coi++ = gr;
6892               *idxoff++ = j;
6893             }
6894             if (!cmapt[cp]) { /* already global */
6895               for (j = 0; j < nz; j++) *coj++ = jj[j];
6896             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6897               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6898             } else { /* offdiag */
6899               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6900             }
6901             ncoo_o += nz;
6902           } else { /* this is a local row */
6903             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6904           }
6905         }
6906       }
6907       mmdata->off[cp + 1] = idxoff;
6908       mmdata->own[cp + 1] = idxown;
6909     }
6910 
6911     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6912     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6913     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6914     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
6915     ncoo = ncoo_d + ncoo_oown + ncoo2;
6916     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6917     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
6918     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6919     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6920     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6921     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6922     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
6923     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6924     coo_i = coo_i2;
6925     coo_j = coo_j2;
6926   } else { /* no offproc values insertion */
6927     ncoo = ncoo_d;
6928     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6929 
6930     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6931     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6932     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6933   }
6934   mmdata->hasoffproc = hasoffproc;
6935 
6936    /* gather (i,j) of nonzeros inserted locally */
6937   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6938     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6939     PetscInt       *coi = coo_i + ncoo_d;
6940     PetscInt       *coj = coo_j + ncoo_d;
6941     const PetscInt *jj  = mm->j;
6942     const PetscInt *ii  = mm->i;
6943     const PetscInt *cmap = cmapa[cp];
6944     const PetscInt *rmap = rmapa[cp];
6945     const PetscInt mr = mp[cp]->rmap->n;
6946     const PetscInt rs = C->rmap->rstart;
6947     const PetscInt re = C->rmap->rend;
6948     const PetscInt cs = C->cmap->rstart;
6949 
6950     if (mptmp[cp]) continue;
6951     if (rmapt[cp] == 1) { /* consecutive rows */
6952       /* fill coo_i */
6953       for (i = 0; i < mr; i++) {
6954         const PetscInt gr = i + rs;
6955         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6956       }
6957       /* fill coo_j */
6958       if (!cmapt[cp]) { /* type-0, already global */
6959         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6960       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
6961         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
6962       } else { /* type-2, local to global for sparse columns */
6963         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6964       }
6965       ncoo_d += mm->nz;
6966     } else if (rmapt[cp] == 2) { /* sparse rows */
6967       for (i = 0; i < mr; i++) {
6968         const PetscInt *jj = mm->j + ii[i];
6969         const PetscInt gr  = rmap[i];
6970         const PetscInt nz  = ii[i+1] - ii[i];
6971         if (gr >= rs && gr < re) { /* local rows */
6972           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6973           if (!cmapt[cp]) { /* type-0, already global */
6974             for (j = 0; j < nz; j++) *coj++ = jj[j];
6975           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6976             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6977           } else { /* type-2, local to global for sparse columns */
6978             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6979           }
6980           ncoo_d += nz;
6981         }
6982       }
6983     }
6984   }
6985   if (glob) {
6986     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6987   }
6988   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6989   if (P_oth_l2g) {
6990     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6991   }
6992   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6993   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
6994   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6995 
6996   /* preallocate with COO data */
6997   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6998   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6999   PetscFunctionReturn(0);
7000 }
7001 
7002 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7003 {
7004   Mat_Product    *product = mat->product;
7005   PetscErrorCode ierr;
7006 #if defined(PETSC_HAVE_DEVICE)
7007   PetscBool      match = PETSC_FALSE;
7008   PetscBool      usecpu = PETSC_FALSE;
7009 #else
7010   PetscBool      match = PETSC_TRUE;
7011 #endif
7012 
7013   PetscFunctionBegin;
7014   MatCheckProduct(mat,1);
7015 #if defined(PETSC_HAVE_DEVICE)
7016   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7017     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7018   }
7019   if (match) { /* we can always fallback to the CPU if requested */
7020     switch (product->type) {
7021     case MATPRODUCT_AB:
7022       if (product->api_user) {
7023         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7024         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7025         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7026       } else {
7027         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7028         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7029         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7030       }
7031       break;
7032     case MATPRODUCT_AtB:
7033       if (product->api_user) {
7034         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7035         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7036         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7037       } else {
7038         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7039         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7040         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7041       }
7042       break;
7043     case MATPRODUCT_PtAP:
7044       if (product->api_user) {
7045         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7046         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7047         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7048       } else {
7049         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7050         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7051         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7052       }
7053       break;
7054     default:
7055       break;
7056     }
7057     match = (PetscBool)!usecpu;
7058   }
7059 #endif
7060   if (match) {
7061     switch (product->type) {
7062     case MATPRODUCT_AB:
7063     case MATPRODUCT_AtB:
7064     case MATPRODUCT_PtAP:
7065       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7066       break;
7067     default:
7068       break;
7069     }
7070   }
7071   /* fallback to MPIAIJ ops */
7072   if (!mat->ops->productsymbolic) {
7073     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7074   }
7075   PetscFunctionReturn(0);
7076 }
7077