xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5fedec97950c19de564efaecd0f125b1a6cb2b20)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62 
63   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
64    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
65    * to differ from the parent matrix. */
66   if (a->lvec) {
67     ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr);
68   }
69   if (a->diag) {
70     ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr);
71   }
72 
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
77 {
78   PetscErrorCode ierr;
79   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
80 
81   PetscFunctionBegin;
82   if (mat->A) {
83     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
84     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
90 {
91   PetscErrorCode  ierr;
92   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
93   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
94   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
95   const PetscInt  *ia,*ib;
96   const MatScalar *aa,*bb,*aav,*bav;
97   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
98   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
99 
100   PetscFunctionBegin;
101   *keptrows = NULL;
102 
103   ia   = a->i;
104   ib   = b->i;
105   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
106   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
107   for (i=0; i<m; i++) {
108     na = ia[i+1] - ia[i];
109     nb = ib[i+1] - ib[i];
110     if (!na && !nb) {
111       cnt++;
112       goto ok1;
113     }
114     aa = aav + ia[i];
115     for (j=0; j<na; j++) {
116       if (aa[j] != 0.0) goto ok1;
117     }
118     bb = bav + ib[i];
119     for (j=0; j <nb; j++) {
120       if (bb[j] != 0.0) goto ok1;
121     }
122     cnt++;
123 ok1:;
124   }
125   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
126   if (!n0rows) {
127     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
128     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
129     PetscFunctionReturn(0);
130   }
131   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
132   cnt  = 0;
133   for (i=0; i<m; i++) {
134     na = ia[i+1] - ia[i];
135     nb = ib[i+1] - ib[i];
136     if (!na && !nb) continue;
137     aa = aav + ia[i];
138     for (j=0; j<na;j++) {
139       if (aa[j] != 0.0) {
140         rows[cnt++] = rstart + i;
141         goto ok2;
142       }
143     }
144     bb = bav + ib[i];
145     for (j=0; j<nb; j++) {
146       if (bb[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151 ok2:;
152   }
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
154   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
155   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
156   PetscFunctionReturn(0);
157 }
158 
159 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
160 {
161   PetscErrorCode    ierr;
162   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
163   PetscBool         cong;
164 
165   PetscFunctionBegin;
166   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
167   if (Y->assembled && cong) {
168     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
169   } else {
170     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
176 {
177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
178   PetscErrorCode ierr;
179   PetscInt       i,rstart,nrows,*rows;
180 
181   PetscFunctionBegin;
182   *zrows = NULL;
183   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
184   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
185   for (i=0; i<nrows; i++) rows[i] += rstart;
186   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
191 {
192   PetscErrorCode    ierr;
193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
194   PetscInt          i,m,n,*garray = aij->garray;
195   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
196   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
197   PetscReal         *work;
198   const PetscScalar *dummy;
199 
200   PetscFunctionBegin;
201   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
202   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
203   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
204   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
205   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
206   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
207   if (type == NORM_2) {
208     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
209       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
210     }
211     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
212       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
213     }
214   } else if (type == NORM_1) {
215     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
216       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
217     }
218     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
219       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
220     }
221   } else if (type == NORM_INFINITY) {
222     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
223       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     }
225     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
226       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
227     }
228   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
229     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
230       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
231     }
232     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
233       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
234     }
235   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
236     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
237       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
238     }
239     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
240       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
241     }
242   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
243   if (type == NORM_INFINITY) {
244     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
245   } else {
246     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
247   }
248   ierr = PetscFree(work);CHKERRQ(ierr);
249   if (type == NORM_2) {
250     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
251   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
252     for (i=0; i<n; i++) reductions[i] /= m;
253   }
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
258 {
259   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
260   IS              sis,gis;
261   PetscErrorCode  ierr;
262   const PetscInt  *isis,*igis;
263   PetscInt        n,*iis,nsis,ngis,rstart,i;
264 
265   PetscFunctionBegin;
266   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
267   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
268   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
269   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
270   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
271   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
274   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
275   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
276   n    = ngis + nsis;
277   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
278   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
279   for (i=0; i<n; i++) iis[i] += rstart;
280   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
281 
282   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
283   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
284   ierr = ISDestroy(&sis);CHKERRQ(ierr);
285   ierr = ISDestroy(&gis);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 /*
290   Local utility routine that creates a mapping from the global column
291 number to the local number in the off-diagonal part of the local
292 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
293 a slightly higher hash table cost; without it it is not scalable (each processor
294 has an order N integer array but is fast to access.
295 */
296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
297 {
298   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
299   PetscErrorCode ierr;
300   PetscInt       n = aij->B->cmap->n,i;
301 
302   PetscFunctionBegin;
303   if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
304 #if defined(PETSC_USE_CTABLE)
305   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
306   for (i=0; i<n; i++) {
307     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
308   }
309 #else
310   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
311   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
312   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
313 #endif
314   PetscFunctionReturn(0);
315 }
316 
317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
318 { \
319     if (col <= lastcol1)  low1 = 0;     \
320     else                 high1 = nrow1; \
321     lastcol1 = col;\
322     while (high1-low1 > 5) { \
323       t = (low1+high1)/2; \
324       if (rp1[t] > col) high1 = t; \
325       else              low1  = t; \
326     } \
327       for (_i=low1; _i<high1; _i++) { \
328         if (rp1[_i] > col) break; \
329         if (rp1[_i] == col) { \
330           if (addv == ADD_VALUES) { \
331             ap1[_i] += value;   \
332             /* Not sure LogFlops will slow dow the code or not */ \
333             (void)PetscLogFlops(1.0);   \
334            } \
335           else                    ap1[_i] = value; \
336           goto a_noinsert; \
337         } \
338       }  \
339       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
340       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
341       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
342       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
343       N = nrow1++ - 1; a->nz++; high1++; \
344       /* shift up all the later entries in this row */ \
345       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
346       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
347       rp1[_i] = col;  \
348       ap1[_i] = value;  \
349       A->nonzerostate++;\
350       a_noinsert: ; \
351       ailen[row] = nrow1; \
352 }
353 
354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
355   { \
356     if (col <= lastcol2) low2 = 0;                        \
357     else high2 = nrow2;                                   \
358     lastcol2 = col;                                       \
359     while (high2-low2 > 5) {                              \
360       t = (low2+high2)/2;                                 \
361       if (rp2[t] > col) high2 = t;                        \
362       else             low2  = t;                         \
363     }                                                     \
364     for (_i=low2; _i<high2; _i++) {                       \
365       if (rp2[_i] > col) break;                           \
366       if (rp2[_i] == col) {                               \
367         if (addv == ADD_VALUES) {                         \
368           ap2[_i] += value;                               \
369           (void)PetscLogFlops(1.0);                       \
370         }                                                 \
371         else                    ap2[_i] = value;          \
372         goto b_noinsert;                                  \
373       }                                                   \
374     }                                                     \
375     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
376     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
377     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
379     N = nrow2++ - 1; b->nz++; high2++;                    \
380     /* shift up all the later entries in this row */      \
381     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
382     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
383     rp2[_i] = col;                                        \
384     ap2[_i] = value;                                      \
385     B->nonzerostate++;                                    \
386     b_noinsert: ;                                         \
387     bilen[row] = nrow2;                                   \
388   }
389 
390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
391 {
392   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
393   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
394   PetscErrorCode ierr;
395   PetscInt       l,*garray = mat->garray,diag;
396   PetscScalar    *aa,*ba;
397 
398   PetscFunctionBegin;
399   /* code only works for square matrices A */
400 
401   /* find size of row to the left of the diagonal part */
402   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
403   row  = row - diag;
404   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
405     if (garray[b->j[b->i[row]+l]] > diag) break;
406   }
407   if (l) {
408     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
409     ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr);
410     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
411   }
412 
413   /* diagonal part */
414   if (a->i[row+1]-a->i[row]) {
415     ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr);
416     ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
417     ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr);
418   }
419 
420   /* right of diagonal part */
421   if (b->i[row+1]-b->i[row]-l) {
422     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
423     ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
424     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
430 {
431   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
432   PetscScalar    value = 0.0;
433   PetscErrorCode ierr;
434   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
435   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
436   PetscBool      roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat        A                    = aij->A;
440   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
441   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
442   PetscBool  ignorezeroentries    = a->ignorezeroentries;
443   Mat        B                    = aij->B;
444   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
445   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
446   MatScalar  *aa,*ba;
447   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
448   PetscInt   nonew;
449   MatScalar  *ap1,*ap2;
450 
451   PetscFunctionBegin;
452   ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
453   ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j=0; j<n; j++) {
475         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
481         } else if (in[j] < 0) continue;
482         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
510               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
511                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
512               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
513             }
514           } else col = in[j];
515           nonew = b->nonew;
516           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
517         }
518       }
519     } else {
520       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
521       if (!aij->donotstash) {
522         mat->assembled = PETSC_FALSE;
523         if (roworiented) {
524           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
532   ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 /*
537     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
538     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
539     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
540 */
541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
542 {
543   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
544   Mat            A           = aij->A; /* diagonal part of the matrix */
545   Mat            B           = aij->B; /* offdiagonal part of the matrix */
546   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
547   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
548   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
549   PetscInt       *ailen      = a->ilen,*aj = a->j;
550   PetscInt       *bilen      = b->ilen,*bj = b->j;
551   PetscInt       am          = aij->A->rmap->n,j;
552   PetscInt       diag_so_far = 0,dnz;
553   PetscInt       offd_so_far = 0,onz;
554 
555   PetscFunctionBegin;
556   /* Iterate over all rows of the matrix */
557   for (j=0; j<am; j++) {
558     dnz = onz = 0;
559     /*  Iterate over all non-zero columns of the current row */
560     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
561       /* If column is in the diagonal */
562       if (mat_j[col] >= cstart && mat_j[col] < cend) {
563         aj[diag_so_far++] = mat_j[col] - cstart;
564         dnz++;
565       } else { /* off-diagonal entries */
566         bj[offd_so_far++] = mat_j[col];
567         onz++;
568       }
569     }
570     ailen[j] = dnz;
571     bilen[j] = onz;
572   }
573   PetscFunctionReturn(0);
574 }
575 
576 /*
577     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
578     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
579     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
580     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
581     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
582 */
583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
584 {
585   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
586   Mat            A      = aij->A; /* diagonal part of the matrix */
587   Mat            B      = aij->B; /* offdiagonal part of the matrix */
588   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
589   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
590   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
591   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
592   PetscInt       *ailen = a->ilen,*aj = a->j;
593   PetscInt       *bilen = b->ilen,*bj = b->j;
594   PetscInt       am     = aij->A->rmap->n,j;
595   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
596   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
597   PetscScalar    *aa = a->a,*ba = b->a;
598 
599   PetscFunctionBegin;
600   /* Iterate over all rows of the matrix */
601   for (j=0; j<am; j++) {
602     dnz_row = onz_row = 0;
603     rowstart_offd = full_offd_i[j];
604     rowstart_diag = full_diag_i[j];
605     /*  Iterate over all non-zero columns of the current row */
606     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
607       /* If column is in the diagonal */
608       if (mat_j[col] >= cstart && mat_j[col] < cend) {
609         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
610         aa[rowstart_diag+dnz_row] = mat_a[col];
611         dnz_row++;
612       } else { /* off-diagonal entries */
613         bj[rowstart_offd+onz_row] = mat_j[col];
614         ba[rowstart_offd+onz_row] = mat_a[col];
615         onz_row++;
616       }
617     }
618     ailen[j] = dnz_row;
619     bilen[j] = onz_row;
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
625 {
626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
627   PetscErrorCode ierr;
628   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
629   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
630 
631   PetscFunctionBegin;
632   for (i=0; i<m; i++) {
633     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
634     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
635     if (idxm[i] >= rstart && idxm[i] < rend) {
636       row = idxm[i] - rstart;
637       for (j=0; j<n; j++) {
638         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
639         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
640         if (idxn[j] >= cstart && idxn[j] < cend) {
641           col  = idxn[j] - cstart;
642           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
643         } else {
644           if (!aij->colmap) {
645             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
646           }
647 #if defined(PETSC_USE_CTABLE)
648           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
654           else {
655             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
656           }
657         }
658       }
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
665 {
666   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
667   PetscErrorCode ierr;
668   PetscInt       nstash,reallocs;
669 
670   PetscFunctionBegin;
671   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
672 
673   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
674   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
675   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
680 {
681   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
682   PetscErrorCode ierr;
683   PetscMPIInt    n;
684   PetscInt       i,j,rstart,ncols,flg;
685   PetscInt       *row,*col;
686   PetscBool      other_disassembled;
687   PetscScalar    *val;
688 
689   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
690 
691   PetscFunctionBegin;
692   if (!aij->donotstash && !mat->nooffprocentries) {
693     while (1) {
694       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
695       if (!flg) break;
696 
697       for (i=0; i<n;) {
698         /* Now identify the consecutive vals belonging to the same row */
699         for (j=i,rstart=row[j]; j<n; j++) {
700           if (row[j] != rstart) break;
701         }
702         if (j < n) ncols = j-i;
703         else       ncols = n-i;
704         /* Now assemble all these values with a single function call */
705         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
706         i    = j;
707       }
708     }
709     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
710   }
711 #if defined(PETSC_HAVE_DEVICE)
712   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
713   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
714   if (mat->boundtocpu) {
715     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
716     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
717   }
718 #endif
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourself, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
730     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738 #if defined(PETSC_HAVE_DEVICE)
739   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
740 #endif
741   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
742   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
743 
744   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
745 
746   aij->rowvalues = NULL;
747 
748   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
749 
750   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
751   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
752     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
753     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
754   }
755 #if defined(PETSC_HAVE_DEVICE)
756   mat->offloadmask = PETSC_OFFLOAD_BOTH;
757 #endif
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
762 {
763   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
768   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
775   PetscObjectState sA, sB;
776   PetscInt        *lrows;
777   PetscInt         r, len;
778   PetscBool        cong, lch, gch;
779   PetscErrorCode   ierr;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
784   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
795     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
803     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
824     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
834     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
835   }
836   ierr = PetscFree(lrows);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscErrorCode    ierr;
851   PetscMPIInt       n = A->rmap->n;
852   PetscInt          i,j,r,m,len = 0;
853   PetscInt          *lrows,*owners = A->rmap->range;
854   PetscMPIInt       p = 0;
855   PetscSFNode       *rrows;
856   PetscSF           sf;
857   const PetscScalar *xx;
858   PetscScalar       *bb,*mask,*aij_a;
859   Vec               xmask,lmask;
860   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
861   const PetscInt    *aj, *ii,*ridx;
862   PetscScalar       *aa;
863 
864   PetscFunctionBegin;
865   /* Create SF where leaves are input rows and roots are owned rows */
866   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
867   for (r = 0; r < n; ++r) lrows[r] = -1;
868   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
869   for (r = 0; r < N; ++r) {
870     const PetscInt idx   = rows[r];
871     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
872     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
873       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
874     }
875     rrows[r].rank  = p;
876     rrows[r].index = rows[r] - owners[p];
877   }
878   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
879   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
880   /* Collect flags for rows to be zeroed */
881   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
882   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
883   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
884   /* Compress and put in row numbers */
885   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
886   /* zero diagonal part of matrix */
887   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
888   /* handle off diagonal part of matrix */
889   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
890   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
891   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
892   for (i=0; i<len; i++) bb[lrows[i]] = 1;
893   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
894   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
897   if (x && b) { /* this code is buggy when the row and column layout don't match */
898     PetscBool cong;
899 
900     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
901     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
902     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
905     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
906   }
907   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
908   /* remove zeroed rows of off diagonal matrix */
909   ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr);
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij_a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij_a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x && b) {
951     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
952     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
953   }
954   ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr);
955   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
956   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
957   ierr = PetscFree(lrows);CHKERRQ(ierr);
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971   PetscInt       nt;
972   VecScatter     Mvctx = a->Mvctx;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
977   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
979   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   VecScatter     Mvctx = a->Mvctx;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1003   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1009 {
1010   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   /* do nondiagonal part */
1015   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1016   /* do local part */
1017   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1018   /* add partial results together */
1019   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1025 {
1026   MPI_Comm       comm;
1027   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1028   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1029   IS             Me,Notme;
1030   PetscErrorCode ierr;
1031   PetscInt       M,N,first,last,*notme,i;
1032   PetscBool      lf;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1039   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1067 {
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   /* do nondiagonal part */
1082   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1083   /* do local part */
1084   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1085   /* add partial results together */
1086   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /*
1092   This only works correctly for square matrices where the subblock A->A is the
1093    diagonal block
1094 */
1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1096 {
1097   PetscErrorCode ierr;
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099 
1100   PetscFunctionBegin;
1101   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1102   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1103   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1114   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1119 {
1120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124 #if defined(PETSC_USE_LOG)
1125   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1126 #endif
1127   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1129   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1130   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1131 #if defined(PETSC_USE_CTABLE)
1132   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1133 #else
1134   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1135 #endif
1136   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1137   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1138   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1139   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1140   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1141   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1142 
1143   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1144   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1145 
1146   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1156 #if defined(PETSC_HAVE_CUDA)
1157   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1158 #endif
1159 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1161 #endif
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1163 #if defined(PETSC_HAVE_ELEMENTAL)
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1165 #endif
1166 #if defined(PETSC_HAVE_SCALAPACK)
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1168 #endif
1169 #if defined(PETSC_HAVE_HYPRE)
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1172 #endif
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1179 #if defined(PETSC_HAVE_MKL_SPARSE)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1189 {
1190   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1191   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1192   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1193   const PetscInt    *garray = aij->garray;
1194   const PetscScalar *aa,*ba;
1195   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1196   PetscInt          *rowlens;
1197   PetscInt          *colidxs;
1198   PetscScalar       *matvals;
1199   PetscErrorCode    ierr;
1200 
1201   PetscFunctionBegin;
1202   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1203 
1204   M  = mat->rmap->N;
1205   N  = mat->cmap->N;
1206   m  = mat->rmap->n;
1207   rs = mat->rmap->rstart;
1208   cs = mat->cmap->rstart;
1209   nz = A->nz + B->nz;
1210 
1211   /* write matrix header */
1212   header[0] = MAT_FILE_CLASSID;
1213   header[1] = M; header[2] = N; header[3] = nz;
1214   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1215   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1216 
1217   /* fill in and store row lengths  */
1218   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1219   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1220   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1221   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1222 
1223   /* fill in and store column indices */
1224   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1225   for (cnt=0, i=0; i<m; i++) {
1226     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       colidxs[cnt++] = garray[B->j[jb]];
1229     }
1230     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1231       colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb<B->i[i+1]; jb++)
1233       colidxs[cnt++] = garray[B->j[jb]];
1234   }
1235   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1236   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1237   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1238 
1239   /* fill in and store nonzero values */
1240   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1241   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1242   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1243   for (cnt=0, i=0; i<m; i++) {
1244     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1245       if (garray[B->j[jb]] > cs) break;
1246       matvals[cnt++] = ba[jb];
1247     }
1248     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1249       matvals[cnt++] = aa[ja];
1250     for (; jb<B->i[i+1]; jb++)
1251       matvals[cnt++] = ba[jb];
1252   }
1253   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1254   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1255   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1256   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1257   ierr = PetscFree(matvals);CHKERRQ(ierr);
1258 
1259   /* write block size option to the viewer's .info file */
1260   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #include <petscdraw.h>
1265 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1266 {
1267   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1268   PetscErrorCode    ierr;
1269   PetscMPIInt       rank = aij->rank,size = aij->size;
1270   PetscBool         isdraw,iascii,isbinary;
1271   PetscViewer       sviewer;
1272   PetscViewerFormat format;
1273 
1274   PetscFunctionBegin;
1275   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1276   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1277   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1278   if (iascii) {
1279     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1280     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1281       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1282       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1283       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1284       for (i=0; i<(PetscInt)size; i++) {
1285         nmax = PetscMax(nmax,nz[i]);
1286         nmin = PetscMin(nmin,nz[i]);
1287         navg += nz[i];
1288       }
1289       ierr = PetscFree(nz);CHKERRQ(ierr);
1290       navg = navg/size;
1291       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1292       PetscFunctionReturn(0);
1293     }
1294     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1295     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1296       MatInfo   info;
1297       PetscInt *inodes=NULL;
1298 
1299       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1300       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1301       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1302       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1303       if (!inodes) {
1304         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1305                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1306       } else {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1309       }
1310       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1312       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1313       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1314       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1315       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1317       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1318       PetscFunctionReturn(0);
1319     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1320       PetscInt inodecount,inodelimit,*inodes;
1321       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1322       if (inodes) {
1323         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1324       } else {
1325         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1326       }
1327       PetscFunctionReturn(0);
1328     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1329       PetscFunctionReturn(0);
1330     }
1331   } else if (isbinary) {
1332     if (size == 1) {
1333       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1334       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1335     } else {
1336       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1337     }
1338     PetscFunctionReturn(0);
1339   } else if (iascii && size == 1) {
1340     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1341     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1342     PetscFunctionReturn(0);
1343   } else if (isdraw) {
1344     PetscDraw draw;
1345     PetscBool isnull;
1346     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1347     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1348     if (isnull) PetscFunctionReturn(0);
1349   }
1350 
1351   { /* assemble the entire matrix onto first processor */
1352     Mat A = NULL, Av;
1353     IS  isrow,iscol;
1354 
1355     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1356     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1357     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1358     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1359 /*  The commented code uses MatCreateSubMatrices instead */
1360 /*
1361     Mat *AA, A = NULL, Av;
1362     IS  isrow,iscol;
1363 
1364     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1365     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1366     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1367     if (rank == 0) {
1368        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1369        A    = AA[0];
1370        Av   = AA[0];
1371     }
1372     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1373 */
1374     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1375     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1376     /*
1377        Everyone has to call to draw the matrix since the graphics waits are
1378        synchronized across all processors that share the PetscDraw object
1379     */
1380     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1381     if (rank == 0) {
1382       if (((PetscObject)mat)->name) {
1383         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1384       }
1385       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1386     }
1387     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1388     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1389     ierr = MatDestroy(&A);CHKERRQ(ierr);
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1395 {
1396   PetscErrorCode ierr;
1397   PetscBool      iascii,isdraw,issocket,isbinary;
1398 
1399   PetscFunctionBegin;
1400   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1401   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1402   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1404   if (iascii || isdraw || isbinary || issocket) {
1405     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   PetscErrorCode ierr;
1414   Vec            bb1 = NULL;
1415   PetscBool      hasop;
1416 
1417   PetscFunctionBegin;
1418   if (flag == SOR_APPLY_UPPER) {
1419     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1420     PetscFunctionReturn(0);
1421   }
1422 
1423   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1424     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1425   }
1426 
1427   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1428     if (flag & SOR_ZERO_INITIAL_GUESS) {
1429       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1430       its--;
1431     }
1432 
1433     while (its--) {
1434       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1435       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1436 
1437       /* update rhs: bb1 = bb - B*x */
1438       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1439       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1440 
1441       /* local sweep */
1442       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1443     }
1444   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1445     if (flag & SOR_ZERO_INITIAL_GUESS) {
1446       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1447       its--;
1448     }
1449     while (its--) {
1450       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1451       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1452 
1453       /* update rhs: bb1 = bb - B*x */
1454       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1455       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1456 
1457       /* local sweep */
1458       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1459     }
1460   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1461     if (flag & SOR_ZERO_INITIAL_GUESS) {
1462       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1463       its--;
1464     }
1465     while (its--) {
1466       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1468 
1469       /* update rhs: bb1 = bb - B*x */
1470       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1471       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1472 
1473       /* local sweep */
1474       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1475     }
1476   } else if (flag & SOR_EISENSTAT) {
1477     Vec xx1;
1478 
1479     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1480     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1481 
1482     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1483     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1484     if (!mat->diag) {
1485       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1486       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1487     }
1488     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1489     if (hasop) {
1490       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1491     } else {
1492       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1493     }
1494     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1495 
1496     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1497 
1498     /* local sweep */
1499     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1500     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1501     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1502   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1503 
1504   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1505 
1506   matin->factorerrortype = mat->A->factorerrortype;
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1511 {
1512   Mat            aA,aB,Aperm;
1513   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1514   PetscScalar    *aa,*ba;
1515   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1516   PetscSF        rowsf,sf;
1517   IS             parcolp = NULL;
1518   PetscBool      done;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1523   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1524   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1525   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1526 
1527   /* Invert row permutation to find out where my rows should go */
1528   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1529   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1530   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1531   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1532   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1533   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1534 
1535   /* Invert column permutation to find out where my columns should go */
1536   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1537   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1538   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1539   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1540   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1541   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1542   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1543 
1544   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1545   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1546   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1547 
1548   /* Find out where my gcols should go */
1549   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1550   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1551   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1552   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1553   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1554   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1555   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1556   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1557 
1558   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1559   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1560   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1561   for (i=0; i<m; i++) {
1562     PetscInt    row = rdest[i];
1563     PetscMPIInt rowner;
1564     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1565     for (j=ai[i]; j<ai[i+1]; j++) {
1566       PetscInt    col = cdest[aj[j]];
1567       PetscMPIInt cowner;
1568       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1569       if (rowner == cowner) dnnz[i]++;
1570       else onnz[i]++;
1571     }
1572     for (j=bi[i]; j<bi[i+1]; j++) {
1573       PetscInt    col = gcdest[bj[j]];
1574       PetscMPIInt cowner;
1575       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1576       if (rowner == cowner) dnnz[i]++;
1577       else onnz[i]++;
1578     }
1579   }
1580   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1581   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1582   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1583   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1585 
1586   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1587   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1588   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1589   for (i=0; i<m; i++) {
1590     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1591     PetscInt j0,rowlen;
1592     rowlen = ai[i+1] - ai[i];
1593     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1594       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1595       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1596     }
1597     rowlen = bi[i+1] - bi[i];
1598     for (j0=j=0; j<rowlen; j0=j) {
1599       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1600       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1601     }
1602   }
1603   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1604   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1605   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1606   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1607   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1608   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1609   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1610   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1611   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1612   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1613   *B = Aperm;
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1618 {
1619   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1620   PetscErrorCode ierr;
1621 
1622   PetscFunctionBegin;
1623   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1624   if (ghosts) *ghosts = aij->garray;
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1629 {
1630   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1631   Mat            A    = mat->A,B = mat->B;
1632   PetscErrorCode ierr;
1633   PetscLogDouble isend[5],irecv[5];
1634 
1635   PetscFunctionBegin;
1636   info->block_size = 1.0;
1637   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1638 
1639   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1640   isend[3] = info->memory;  isend[4] = info->mallocs;
1641 
1642   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1643 
1644   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1645   isend[3] += info->memory;  isend[4] += info->mallocs;
1646   if (flag == MAT_LOCAL) {
1647     info->nz_used      = isend[0];
1648     info->nz_allocated = isend[1];
1649     info->nz_unneeded  = isend[2];
1650     info->memory       = isend[3];
1651     info->mallocs      = isend[4];
1652   } else if (flag == MAT_GLOBAL_MAX) {
1653     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1654 
1655     info->nz_used      = irecv[0];
1656     info->nz_allocated = irecv[1];
1657     info->nz_unneeded  = irecv[2];
1658     info->memory       = irecv[3];
1659     info->mallocs      = irecv[4];
1660   } else if (flag == MAT_GLOBAL_SUM) {
1661     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1662 
1663     info->nz_used      = irecv[0];
1664     info->nz_allocated = irecv[1];
1665     info->nz_unneeded  = irecv[2];
1666     info->memory       = irecv[3];
1667     info->mallocs      = irecv[4];
1668   }
1669   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1670   info->fill_ratio_needed = 0;
1671   info->factor_mallocs    = 0;
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1676 {
1677   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   switch (op) {
1682   case MAT_NEW_NONZERO_LOCATIONS:
1683   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1684   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1685   case MAT_KEEP_NONZERO_PATTERN:
1686   case MAT_NEW_NONZERO_LOCATION_ERR:
1687   case MAT_USE_INODES:
1688   case MAT_IGNORE_ZERO_ENTRIES:
1689   case MAT_FORM_EXPLICIT_TRANSPOSE:
1690     MatCheckPreallocated(A,1);
1691     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1692     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1693     break;
1694   case MAT_ROW_ORIENTED:
1695     MatCheckPreallocated(A,1);
1696     a->roworiented = flg;
1697 
1698     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1699     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1700     break;
1701   case MAT_FORCE_DIAGONAL_ENTRIES:
1702   case MAT_SORTED_FULL:
1703     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1704     break;
1705   case MAT_IGNORE_OFF_PROC_ENTRIES:
1706     a->donotstash = flg;
1707     break;
1708   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1709   case MAT_SPD:
1710   case MAT_SYMMETRIC:
1711   case MAT_STRUCTURALLY_SYMMETRIC:
1712   case MAT_HERMITIAN:
1713   case MAT_SYMMETRY_ETERNAL:
1714     break;
1715   case MAT_SUBMAT_SINGLEIS:
1716     A->submat_singleis = flg;
1717     break;
1718   case MAT_STRUCTURE_ONLY:
1719     /* The option is handled directly by MatSetOption() */
1720     break;
1721   default:
1722     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1728 {
1729   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1730   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1731   PetscErrorCode ierr;
1732   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1733   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1734   PetscInt       *cmap,*idx_p;
1735 
1736   PetscFunctionBegin;
1737   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1738   mat->getrowactive = PETSC_TRUE;
1739 
1740   if (!mat->rowvalues && (idx || v)) {
1741     /*
1742         allocate enough space to hold information from the longest row.
1743     */
1744     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1745     PetscInt   max = 1,tmp;
1746     for (i=0; i<matin->rmap->n; i++) {
1747       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1748       if (max < tmp) max = tmp;
1749     }
1750     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1751   }
1752 
1753   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1754   lrow = row - rstart;
1755 
1756   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1757   if (!v)   {pvA = NULL; pvB = NULL;}
1758   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1759   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1760   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1761   nztot = nzA + nzB;
1762 
1763   cmap = mat->garray;
1764   if (v  || idx) {
1765     if (nztot) {
1766       /* Sort by increasing column numbers, assuming A and B already sorted */
1767       PetscInt imark = -1;
1768       if (v) {
1769         *v = v_p = mat->rowvalues;
1770         for (i=0; i<nzB; i++) {
1771           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1772           else break;
1773         }
1774         imark = i;
1775         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1776         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1777       }
1778       if (idx) {
1779         *idx = idx_p = mat->rowindices;
1780         if (imark > -1) {
1781           for (i=0; i<imark; i++) {
1782             idx_p[i] = cmap[cworkB[i]];
1783           }
1784         } else {
1785           for (i=0; i<nzB; i++) {
1786             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1787             else break;
1788           }
1789           imark = i;
1790         }
1791         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1792         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1793       }
1794     } else {
1795       if (idx) *idx = NULL;
1796       if (v)   *v   = NULL;
1797     }
1798   }
1799   *nz  = nztot;
1800   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1801   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1806 {
1807   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1808 
1809   PetscFunctionBegin;
1810   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1811   aij->getrowactive = PETSC_FALSE;
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1816 {
1817   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1818   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1819   PetscErrorCode  ierr;
1820   PetscInt        i,j,cstart = mat->cmap->rstart;
1821   PetscReal       sum = 0.0;
1822   const MatScalar *v,*amata,*bmata;
1823 
1824   PetscFunctionBegin;
1825   if (aij->size == 1) {
1826     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1827   } else {
1828     ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr);
1829     ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1830     if (type == NORM_FROBENIUS) {
1831       v = amata;
1832       for (i=0; i<amat->nz; i++) {
1833         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1834       }
1835       v = bmata;
1836       for (i=0; i<bmat->nz; i++) {
1837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1838       }
1839       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1840       *norm = PetscSqrtReal(*norm);
1841       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1842     } else if (type == NORM_1) { /* max column norm */
1843       PetscReal *tmp,*tmp2;
1844       PetscInt  *jj,*garray = aij->garray;
1845       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1846       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1847       *norm = 0.0;
1848       v     = amata; jj = amat->j;
1849       for (j=0; j<amat->nz; j++) {
1850         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1851       }
1852       v = bmata; jj = bmat->j;
1853       for (j=0; j<bmat->nz; j++) {
1854         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1855       }
1856       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1857       for (j=0; j<mat->cmap->N; j++) {
1858         if (tmp2[j] > *norm) *norm = tmp2[j];
1859       }
1860       ierr = PetscFree(tmp);CHKERRQ(ierr);
1861       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1862       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1863     } else if (type == NORM_INFINITY) { /* max row norm */
1864       PetscReal ntemp = 0.0;
1865       for (j=0; j<aij->A->rmap->n; j++) {
1866         v   = amata + amat->i[j];
1867         sum = 0.0;
1868         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1869           sum += PetscAbsScalar(*v); v++;
1870         }
1871         v = bmata + bmat->i[j];
1872         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1873           sum += PetscAbsScalar(*v); v++;
1874         }
1875         if (sum > ntemp) ntemp = sum;
1876       }
1877       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1878       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1879     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1880     ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr);
1881     ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1882   }
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1887 {
1888   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1889   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1890   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1891   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1892   PetscErrorCode  ierr;
1893   Mat             B,A_diag,*B_diag;
1894   const MatScalar *pbv,*bv;
1895 
1896   PetscFunctionBegin;
1897   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1898   ai = Aloc->i; aj = Aloc->j;
1899   bi = Bloc->i; bj = Bloc->j;
1900   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1901     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1902     PetscSFNode          *oloc;
1903     PETSC_UNUSED PetscSF sf;
1904 
1905     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1906     /* compute d_nnz for preallocation */
1907     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1908     for (i=0; i<ai[ma]; i++) {
1909       d_nnz[aj[i]]++;
1910     }
1911     /* compute local off-diagonal contributions */
1912     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1913     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1914     /* map those to global */
1915     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1916     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1917     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1918     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1919     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1920     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1921     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1922 
1923     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1924     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1925     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1926     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1927     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1928     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1929   } else {
1930     B    = *matout;
1931     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1932   }
1933 
1934   b           = (Mat_MPIAIJ*)B->data;
1935   A_diag      = a->A;
1936   B_diag      = &b->A;
1937   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1938   A_diag_ncol = A_diag->cmap->N;
1939   B_diag_ilen = sub_B_diag->ilen;
1940   B_diag_i    = sub_B_diag->i;
1941 
1942   /* Set ilen for diagonal of B */
1943   for (i=0; i<A_diag_ncol; i++) {
1944     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1945   }
1946 
1947   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1948   very quickly (=without using MatSetValues), because all writes are local. */
1949   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1950 
1951   /* copy over the B part */
1952   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1953   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1954   pbv  = bv;
1955   row  = A->rmap->rstart;
1956   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1957   cols_tmp = cols;
1958   for (i=0; i<mb; i++) {
1959     ncol = bi[i+1]-bi[i];
1960     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1961     row++;
1962     pbv += ncol; cols_tmp += ncol;
1963   }
1964   ierr = PetscFree(cols);CHKERRQ(ierr);
1965   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1966 
1967   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1970     *matout = B;
1971   } else {
1972     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1973   }
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1978 {
1979   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1980   Mat            a    = aij->A,b = aij->B;
1981   PetscErrorCode ierr;
1982   PetscInt       s1,s2,s3;
1983 
1984   PetscFunctionBegin;
1985   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1986   if (rr) {
1987     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1988     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1989     /* Overlap communication with computation. */
1990     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1991   }
1992   if (ll) {
1993     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1994     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1995     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1996   }
1997   /* scale  the diagonal block */
1998   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1999 
2000   if (rr) {
2001     /* Do a scatter end and then right scale the off-diagonal block */
2002     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2003     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2004   }
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2009 {
2010   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2011   PetscErrorCode ierr;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2019 {
2020   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2021   Mat            a,b,c,d;
2022   PetscBool      flg;
2023   PetscErrorCode ierr;
2024 
2025   PetscFunctionBegin;
2026   a = matA->A; b = matA->B;
2027   c = matB->A; d = matB->B;
2028 
2029   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2030   if (flg) {
2031     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2032   }
2033   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2038 {
2039   PetscErrorCode ierr;
2040   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2041   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2042 
2043   PetscFunctionBegin;
2044   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2045   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2046     /* because of the column compression in the off-processor part of the matrix a->B,
2047        the number of columns in a->B and b->B may be different, hence we cannot call
2048        the MatCopy() directly on the two parts. If need be, we can provide a more
2049        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2050        then copying the submatrices */
2051     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2052   } else {
2053     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2054     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2055   }
2056   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2061 {
2062   PetscErrorCode ierr;
2063 
2064   PetscFunctionBegin;
2065   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 /*
2070    Computes the number of nonzeros per row needed for preallocation when X and Y
2071    have different nonzero structure.
2072 */
2073 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2074 {
2075   PetscInt       i,j,k,nzx,nzy;
2076 
2077   PetscFunctionBegin;
2078   /* Set the number of nonzeros in the new matrix */
2079   for (i=0; i<m; i++) {
2080     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2081     nzx = xi[i+1] - xi[i];
2082     nzy = yi[i+1] - yi[i];
2083     nnz[i] = 0;
2084     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2085       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2086       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2087       nnz[i]++;
2088     }
2089     for (; k<nzy; k++) nnz[i]++;
2090   }
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2095 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2096 {
2097   PetscErrorCode ierr;
2098   PetscInt       m = Y->rmap->N;
2099   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2100   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2101 
2102   PetscFunctionBegin;
2103   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2108 {
2109   PetscErrorCode ierr;
2110   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2111 
2112   PetscFunctionBegin;
2113   if (str == SAME_NONZERO_PATTERN) {
2114     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2115     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2116   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2117     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2118   } else {
2119     Mat      B;
2120     PetscInt *nnz_d,*nnz_o;
2121 
2122     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2123     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2124     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2125     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2126     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2127     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2128     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2129     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2130     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2131     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2132     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
2133     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2134     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2135   }
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2140 
2141 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2142 {
2143 #if defined(PETSC_USE_COMPLEX)
2144   PetscErrorCode ierr;
2145   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2146 
2147   PetscFunctionBegin;
2148   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2149   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2150 #else
2151   PetscFunctionBegin;
2152 #endif
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2157 {
2158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2163   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2174   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2179 {
2180   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2181   PetscErrorCode    ierr;
2182   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2183   PetscScalar       *va,*vv;
2184   Vec               vB,vA;
2185   const PetscScalar *vb;
2186 
2187   PetscFunctionBegin;
2188   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2189   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2190 
2191   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2192   if (idx) {
2193     for (i=0; i<m; i++) {
2194       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2195     }
2196   }
2197 
2198   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2199   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2200   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2201 
2202   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2203   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2204   for (i=0; i<m; i++) {
2205     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2206       vv[i] = vb[i];
2207       if (idx) idx[i] = a->garray[idxb[i]];
2208     } else {
2209       vv[i] = va[i];
2210       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2211         idx[i] = a->garray[idxb[i]];
2212     }
2213   }
2214   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2215   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2216   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2217   ierr = PetscFree(idxb);CHKERRQ(ierr);
2218   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2219   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2224 {
2225   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2226   PetscInt          m = A->rmap->n,n = A->cmap->n;
2227   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2228   PetscInt          *cmap  = mat->garray;
2229   PetscInt          *diagIdx, *offdiagIdx;
2230   Vec               diagV, offdiagV;
2231   PetscScalar       *a, *diagA, *offdiagA;
2232   const PetscScalar *ba,*bav;
2233   PetscInt          r,j,col,ncols,*bi,*bj;
2234   PetscErrorCode    ierr;
2235   Mat               B = mat->B;
2236   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2237 
2238   PetscFunctionBegin;
2239   /* When a process holds entire A and other processes have no entry */
2240   if (A->cmap->N == n) {
2241     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2242     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2243     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2244     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2245     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2246     PetscFunctionReturn(0);
2247   } else if (n == 0) {
2248     if (m) {
2249       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2250       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2251       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2252     }
2253     PetscFunctionReturn(0);
2254   }
2255 
2256   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2257   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2258   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2259   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2260 
2261   /* Get offdiagIdx[] for implicit 0.0 */
2262   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2263   ba   = bav;
2264   bi   = b->i;
2265   bj   = b->j;
2266   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2267   for (r = 0; r < m; r++) {
2268     ncols = bi[r+1] - bi[r];
2269     if (ncols == A->cmap->N - n) { /* Brow is dense */
2270       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2271     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2272       offdiagA[r] = 0.0;
2273 
2274       /* Find first hole in the cmap */
2275       for (j=0; j<ncols; j++) {
2276         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2277         if (col > j && j < cstart) {
2278           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2279           break;
2280         } else if (col > j + n && j >= cstart) {
2281           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2282           break;
2283         }
2284       }
2285       if (j == ncols && ncols < A->cmap->N - n) {
2286         /* a hole is outside compressed Bcols */
2287         if (ncols == 0) {
2288           if (cstart) {
2289             offdiagIdx[r] = 0;
2290           } else offdiagIdx[r] = cend;
2291         } else { /* ncols > 0 */
2292           offdiagIdx[r] = cmap[ncols-1] + 1;
2293           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2294         }
2295       }
2296     }
2297 
2298     for (j=0; j<ncols; j++) {
2299       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2300       ba++; bj++;
2301     }
2302   }
2303 
2304   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2305   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2306   for (r = 0; r < m; ++r) {
2307     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2308       a[r]   = diagA[r];
2309       if (idx) idx[r] = cstart + diagIdx[r];
2310     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2311       a[r] = diagA[r];
2312       if (idx) {
2313         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2314           idx[r] = cstart + diagIdx[r];
2315         } else idx[r] = offdiagIdx[r];
2316       }
2317     } else {
2318       a[r]   = offdiagA[r];
2319       if (idx) idx[r] = offdiagIdx[r];
2320     }
2321   }
2322   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2323   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2324   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2325   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2326   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2327   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2328   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2329   PetscFunctionReturn(0);
2330 }
2331 
2332 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2333 {
2334   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2335   PetscInt          m = A->rmap->n,n = A->cmap->n;
2336   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2337   PetscInt          *cmap  = mat->garray;
2338   PetscInt          *diagIdx, *offdiagIdx;
2339   Vec               diagV, offdiagV;
2340   PetscScalar       *a, *diagA, *offdiagA;
2341   const PetscScalar *ba,*bav;
2342   PetscInt          r,j,col,ncols,*bi,*bj;
2343   PetscErrorCode    ierr;
2344   Mat               B = mat->B;
2345   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2346 
2347   PetscFunctionBegin;
2348   /* When a process holds entire A and other processes have no entry */
2349   if (A->cmap->N == n) {
2350     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2351     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2352     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2353     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2354     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2355     PetscFunctionReturn(0);
2356   } else if (n == 0) {
2357     if (m) {
2358       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2359       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2360       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2361     }
2362     PetscFunctionReturn(0);
2363   }
2364 
2365   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2366   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2367   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2368   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2369 
2370   /* Get offdiagIdx[] for implicit 0.0 */
2371   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2372   ba   = bav;
2373   bi   = b->i;
2374   bj   = b->j;
2375   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2376   for (r = 0; r < m; r++) {
2377     ncols = bi[r+1] - bi[r];
2378     if (ncols == A->cmap->N - n) { /* Brow is dense */
2379       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2380     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2381       offdiagA[r] = 0.0;
2382 
2383       /* Find first hole in the cmap */
2384       for (j=0; j<ncols; j++) {
2385         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2386         if (col > j && j < cstart) {
2387           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2388           break;
2389         } else if (col > j + n && j >= cstart) {
2390           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2391           break;
2392         }
2393       }
2394       if (j == ncols && ncols < A->cmap->N - n) {
2395         /* a hole is outside compressed Bcols */
2396         if (ncols == 0) {
2397           if (cstart) {
2398             offdiagIdx[r] = 0;
2399           } else offdiagIdx[r] = cend;
2400         } else { /* ncols > 0 */
2401           offdiagIdx[r] = cmap[ncols-1] + 1;
2402           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2403         }
2404       }
2405     }
2406 
2407     for (j=0; j<ncols; j++) {
2408       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2409       ba++; bj++;
2410     }
2411   }
2412 
2413   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2414   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2415   for (r = 0; r < m; ++r) {
2416     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2417       a[r]   = diagA[r];
2418       if (idx) idx[r] = cstart + diagIdx[r];
2419     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2420       a[r] = diagA[r];
2421       if (idx) {
2422         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2423           idx[r] = cstart + diagIdx[r];
2424         } else idx[r] = offdiagIdx[r];
2425       }
2426     } else {
2427       a[r]   = offdiagA[r];
2428       if (idx) idx[r] = offdiagIdx[r];
2429     }
2430   }
2431   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2432   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2433   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2434   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2435   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2436   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2437   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2442 {
2443   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2444   PetscInt          m = A->rmap->n,n = A->cmap->n;
2445   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2446   PetscInt          *cmap  = mat->garray;
2447   PetscInt          *diagIdx, *offdiagIdx;
2448   Vec               diagV, offdiagV;
2449   PetscScalar       *a, *diagA, *offdiagA;
2450   const PetscScalar *ba,*bav;
2451   PetscInt          r,j,col,ncols,*bi,*bj;
2452   PetscErrorCode    ierr;
2453   Mat               B = mat->B;
2454   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2455 
2456   PetscFunctionBegin;
2457   /* When a process holds entire A and other processes have no entry */
2458   if (A->cmap->N == n) {
2459     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2460     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2461     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2462     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2463     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2464     PetscFunctionReturn(0);
2465   } else if (n == 0) {
2466     if (m) {
2467       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2468       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2469       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2470     }
2471     PetscFunctionReturn(0);
2472   }
2473 
2474   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2475   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2476   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2477   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2478 
2479   /* Get offdiagIdx[] for implicit 0.0 */
2480   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2481   ba   = bav;
2482   bi   = b->i;
2483   bj   = b->j;
2484   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2485   for (r = 0; r < m; r++) {
2486     ncols = bi[r+1] - bi[r];
2487     if (ncols == A->cmap->N - n) { /* Brow is dense */
2488       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2489     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2490       offdiagA[r] = 0.0;
2491 
2492       /* Find first hole in the cmap */
2493       for (j=0; j<ncols; j++) {
2494         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2495         if (col > j && j < cstart) {
2496           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2497           break;
2498         } else if (col > j + n && j >= cstart) {
2499           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2500           break;
2501         }
2502       }
2503       if (j == ncols && ncols < A->cmap->N - n) {
2504         /* a hole is outside compressed Bcols */
2505         if (ncols == 0) {
2506           if (cstart) {
2507             offdiagIdx[r] = 0;
2508           } else offdiagIdx[r] = cend;
2509         } else { /* ncols > 0 */
2510           offdiagIdx[r] = cmap[ncols-1] + 1;
2511           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2512         }
2513       }
2514     }
2515 
2516     for (j=0; j<ncols; j++) {
2517       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2518       ba++; bj++;
2519     }
2520   }
2521 
2522   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2523   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2524   for (r = 0; r < m; ++r) {
2525     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2526       a[r] = diagA[r];
2527       if (idx) idx[r] = cstart + diagIdx[r];
2528     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2529       a[r] = diagA[r];
2530       if (idx) {
2531         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2532           idx[r] = cstart + diagIdx[r];
2533         } else idx[r] = offdiagIdx[r];
2534       }
2535     } else {
2536       a[r] = offdiagA[r];
2537       if (idx) idx[r] = offdiagIdx[r];
2538     }
2539   }
2540   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2541   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2542   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2543   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2544   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2545   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2546   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2551 {
2552   PetscErrorCode ierr;
2553   Mat            *dummy;
2554 
2555   PetscFunctionBegin;
2556   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2557   *newmat = *dummy;
2558   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2563 {
2564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2565   PetscErrorCode ierr;
2566 
2567   PetscFunctionBegin;
2568   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2569   A->factorerrortype = a->A->factorerrortype;
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2574 {
2575   PetscErrorCode ierr;
2576   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2577 
2578   PetscFunctionBegin;
2579   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2580   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2581   if (x->assembled) {
2582     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2583   } else {
2584     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2585   }
2586   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2587   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2592 {
2593   PetscFunctionBegin;
2594   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2595   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 /*@
2600    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2601 
2602    Collective on Mat
2603 
2604    Input Parameters:
2605 +    A - the matrix
2606 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2607 
2608  Level: advanced
2609 
2610 @*/
2611 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2612 {
2613   PetscErrorCode       ierr;
2614 
2615   PetscFunctionBegin;
2616   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2621 {
2622   PetscErrorCode       ierr;
2623   PetscBool            sc = PETSC_FALSE,flg;
2624 
2625   PetscFunctionBegin;
2626   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2627   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2628   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2629   if (flg) {
2630     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2631   }
2632   ierr = PetscOptionsTail();CHKERRQ(ierr);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2637 {
2638   PetscErrorCode ierr;
2639   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2640   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2641 
2642   PetscFunctionBegin;
2643   if (!Y->preallocated) {
2644     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2645   } else if (!aij->nz) {
2646     PetscInt nonew = aij->nonew;
2647     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2648     aij->nonew = nonew;
2649   }
2650   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2655 {
2656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2657   PetscErrorCode ierr;
2658 
2659   PetscFunctionBegin;
2660   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2661   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2662   if (d) {
2663     PetscInt rstart;
2664     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2665     *d += rstart;
2666 
2667   }
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2672 {
2673   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2674   PetscErrorCode ierr;
2675 
2676   PetscFunctionBegin;
2677   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 /* -------------------------------------------------------------------*/
2682 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2683                                        MatGetRow_MPIAIJ,
2684                                        MatRestoreRow_MPIAIJ,
2685                                        MatMult_MPIAIJ,
2686                                 /* 4*/ MatMultAdd_MPIAIJ,
2687                                        MatMultTranspose_MPIAIJ,
2688                                        MatMultTransposeAdd_MPIAIJ,
2689                                        NULL,
2690                                        NULL,
2691                                        NULL,
2692                                 /*10*/ NULL,
2693                                        NULL,
2694                                        NULL,
2695                                        MatSOR_MPIAIJ,
2696                                        MatTranspose_MPIAIJ,
2697                                 /*15*/ MatGetInfo_MPIAIJ,
2698                                        MatEqual_MPIAIJ,
2699                                        MatGetDiagonal_MPIAIJ,
2700                                        MatDiagonalScale_MPIAIJ,
2701                                        MatNorm_MPIAIJ,
2702                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2703                                        MatAssemblyEnd_MPIAIJ,
2704                                        MatSetOption_MPIAIJ,
2705                                        MatZeroEntries_MPIAIJ,
2706                                 /*24*/ MatZeroRows_MPIAIJ,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                        NULL,
2711                                 /*29*/ MatSetUp_MPIAIJ,
2712                                        NULL,
2713                                        NULL,
2714                                        MatGetDiagonalBlock_MPIAIJ,
2715                                        NULL,
2716                                 /*34*/ MatDuplicate_MPIAIJ,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                 /*39*/ MatAXPY_MPIAIJ,
2722                                        MatCreateSubMatrices_MPIAIJ,
2723                                        MatIncreaseOverlap_MPIAIJ,
2724                                        MatGetValues_MPIAIJ,
2725                                        MatCopy_MPIAIJ,
2726                                 /*44*/ MatGetRowMax_MPIAIJ,
2727                                        MatScale_MPIAIJ,
2728                                        MatShift_MPIAIJ,
2729                                        MatDiagonalSet_MPIAIJ,
2730                                        MatZeroRowsColumns_MPIAIJ,
2731                                 /*49*/ MatSetRandom_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                        NULL,
2736                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2737                                        NULL,
2738                                        MatSetUnfactored_MPIAIJ,
2739                                        MatPermute_MPIAIJ,
2740                                        NULL,
2741                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2742                                        MatDestroy_MPIAIJ,
2743                                        MatView_MPIAIJ,
2744                                        NULL,
2745                                        NULL,
2746                                 /*64*/ NULL,
2747                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2752                                        MatGetRowMinAbs_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                 /*75*/ MatFDColoringApply_AIJ,
2758                                        MatSetFromOptions_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        MatFindZeroDiagonals_MPIAIJ,
2762                                 /*80*/ NULL,
2763                                        NULL,
2764                                        NULL,
2765                                 /*83*/ MatLoad_MPIAIJ,
2766                                        MatIsSymmetric_MPIAIJ,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                 /*89*/ NULL,
2772                                        NULL,
2773                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2774                                        NULL,
2775                                        NULL,
2776                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2777                                        NULL,
2778                                        NULL,
2779                                        NULL,
2780                                        MatBindToCPU_MPIAIJ,
2781                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                        MatConjugate_MPIAIJ,
2785                                        NULL,
2786                                 /*104*/MatSetValuesRow_MPIAIJ,
2787                                        MatRealPart_MPIAIJ,
2788                                        MatImaginaryPart_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                 /*109*/NULL,
2792                                        NULL,
2793                                        MatGetRowMin_MPIAIJ,
2794                                        NULL,
2795                                        MatMissingDiagonal_MPIAIJ,
2796                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2797                                        NULL,
2798                                        MatGetGhosts_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        MatGetMultiProcBlock_MPIAIJ,
2806                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2807                                        MatGetColumnReductions_MPIAIJ,
2808                                        MatInvertBlockDiagonal_MPIAIJ,
2809                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2810                                        MatCreateSubMatricesMPI_MPIAIJ,
2811                                 /*129*/NULL,
2812                                        NULL,
2813                                        NULL,
2814                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2815                                        NULL,
2816                                 /*134*/NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                 /*139*/MatSetBlockSizes_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        MatFDColoringSetUp_MPIXAIJ,
2825                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2826                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2827                                 /*145*/NULL,
2828                                        NULL,
2829                                        NULL
2830 };
2831 
2832 /* ----------------------------------------------------------------------------------------*/
2833 
2834 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2835 {
2836   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2837   PetscErrorCode ierr;
2838 
2839   PetscFunctionBegin;
2840   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2841   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2842   PetscFunctionReturn(0);
2843 }
2844 
2845 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2846 {
2847   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2848   PetscErrorCode ierr;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2852   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2857 {
2858   Mat_MPIAIJ     *b;
2859   PetscErrorCode ierr;
2860   PetscMPIInt    size;
2861 
2862   PetscFunctionBegin;
2863   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2864   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2865   b = (Mat_MPIAIJ*)B->data;
2866 
2867 #if defined(PETSC_USE_CTABLE)
2868   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2869 #else
2870   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2871 #endif
2872   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2873   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2874   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2875 
2876   /* Because the B will have been resized we simply destroy it and create a new one each time */
2877   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2878   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2879   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2880   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2881   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2882   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2883   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2884 
2885   if (!B->preallocated) {
2886     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2887     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2888     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2889     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2890     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2891   }
2892 
2893   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2894   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2895   B->preallocated  = PETSC_TRUE;
2896   B->was_assembled = PETSC_FALSE;
2897   B->assembled     = PETSC_FALSE;
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2902 {
2903   Mat_MPIAIJ     *b;
2904   PetscErrorCode ierr;
2905 
2906   PetscFunctionBegin;
2907   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2908   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2909   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2910   b = (Mat_MPIAIJ*)B->data;
2911 
2912 #if defined(PETSC_USE_CTABLE)
2913   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2914 #else
2915   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2916 #endif
2917   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2918   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2919   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2920 
2921   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2922   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2923   B->preallocated  = PETSC_TRUE;
2924   B->was_assembled = PETSC_FALSE;
2925   B->assembled = PETSC_FALSE;
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2930 {
2931   Mat            mat;
2932   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2933   PetscErrorCode ierr;
2934 
2935   PetscFunctionBegin;
2936   *newmat = NULL;
2937   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2938   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2939   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2940   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2941   a       = (Mat_MPIAIJ*)mat->data;
2942 
2943   mat->factortype   = matin->factortype;
2944   mat->assembled    = matin->assembled;
2945   mat->insertmode   = NOT_SET_VALUES;
2946   mat->preallocated = matin->preallocated;
2947 
2948   a->size         = oldmat->size;
2949   a->rank         = oldmat->rank;
2950   a->donotstash   = oldmat->donotstash;
2951   a->roworiented  = oldmat->roworiented;
2952   a->rowindices   = NULL;
2953   a->rowvalues    = NULL;
2954   a->getrowactive = PETSC_FALSE;
2955 
2956   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2957   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2958 
2959   if (oldmat->colmap) {
2960 #if defined(PETSC_USE_CTABLE)
2961     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2962 #else
2963     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2964     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2965     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2966 #endif
2967   } else a->colmap = NULL;
2968   if (oldmat->garray) {
2969     PetscInt len;
2970     len  = oldmat->B->cmap->n;
2971     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2972     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2973     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2974   } else a->garray = NULL;
2975 
2976   /* It may happen MatDuplicate is called with a non-assembled matrix
2977      In fact, MatDuplicate only requires the matrix to be preallocated
2978      This may happen inside a DMCreateMatrix_Shell */
2979   if (oldmat->lvec) {
2980     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2981     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2982   }
2983   if (oldmat->Mvctx) {
2984     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2985     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2986   }
2987   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2988   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2989   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2990   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2991   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2992   *newmat = mat;
2993   PetscFunctionReturn(0);
2994 }
2995 
2996 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2997 {
2998   PetscBool      isbinary, ishdf5;
2999   PetscErrorCode ierr;
3000 
3001   PetscFunctionBegin;
3002   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3003   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3004   /* force binary viewer to load .info file if it has not yet done so */
3005   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3006   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3007   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3008   if (isbinary) {
3009     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3010   } else if (ishdf5) {
3011 #if defined(PETSC_HAVE_HDF5)
3012     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3013 #else
3014     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3015 #endif
3016   } else {
3017     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3018   }
3019   PetscFunctionReturn(0);
3020 }
3021 
3022 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3023 {
3024   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3025   PetscInt       *rowidxs,*colidxs;
3026   PetscScalar    *matvals;
3027   PetscErrorCode ierr;
3028 
3029   PetscFunctionBegin;
3030   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3031 
3032   /* read in matrix header */
3033   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3034   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3035   M  = header[1]; N = header[2]; nz = header[3];
3036   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3037   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3038   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3039 
3040   /* set block sizes from the viewer's .info file */
3041   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3042   /* set global sizes if not set already */
3043   if (mat->rmap->N < 0) mat->rmap->N = M;
3044   if (mat->cmap->N < 0) mat->cmap->N = N;
3045   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3046   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3047 
3048   /* check if the matrix sizes are correct */
3049   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3050   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3051 
3052   /* read in row lengths and build row indices */
3053   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3054   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3055   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3056   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3057   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3058   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3059   /* read in column indices and matrix values */
3060   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3061   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3062   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3063   /* store matrix indices and values */
3064   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3065   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3066   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 /* Not scalable because of ISAllGather() unless getting all columns. */
3071 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3072 {
3073   PetscErrorCode ierr;
3074   IS             iscol_local;
3075   PetscBool      isstride;
3076   PetscMPIInt    lisstride=0,gisstride;
3077 
3078   PetscFunctionBegin;
3079   /* check if we are grabbing all columns*/
3080   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3081 
3082   if (isstride) {
3083     PetscInt  start,len,mstart,mlen;
3084     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3085     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3086     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3087     if (mstart == start && mlen-mstart == len) lisstride = 1;
3088   }
3089 
3090   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3091   if (gisstride) {
3092     PetscInt N;
3093     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3094     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3095     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3096     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3097   } else {
3098     PetscInt cbs;
3099     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3100     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3101     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3102   }
3103 
3104   *isseq = iscol_local;
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 /*
3109  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3110  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3111 
3112  Input Parameters:
3113    mat - matrix
3114    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3115            i.e., mat->rstart <= isrow[i] < mat->rend
3116    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3117            i.e., mat->cstart <= iscol[i] < mat->cend
3118  Output Parameter:
3119    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3120    iscol_o - sequential column index set for retrieving mat->B
3121    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3122  */
3123 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3124 {
3125   PetscErrorCode ierr;
3126   Vec            x,cmap;
3127   const PetscInt *is_idx;
3128   PetscScalar    *xarray,*cmaparray;
3129   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3130   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3131   Mat            B=a->B;
3132   Vec            lvec=a->lvec,lcmap;
3133   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3134   MPI_Comm       comm;
3135   VecScatter     Mvctx=a->Mvctx;
3136 
3137   PetscFunctionBegin;
3138   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3139   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3140 
3141   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3142   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3143   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3144   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3145   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3146 
3147   /* Get start indices */
3148   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3149   isstart -= ncols;
3150   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3151 
3152   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3153   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3154   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3155   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3156   for (i=0; i<ncols; i++) {
3157     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3158     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3159     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3160   }
3161   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3162   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3163   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3164 
3165   /* Get iscol_d */
3166   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3167   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3168   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3169 
3170   /* Get isrow_d */
3171   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3172   rstart = mat->rmap->rstart;
3173   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3174   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3175   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3176   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3177 
3178   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3179   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3180   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3181 
3182   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3183   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3184   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3185 
3186   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3187 
3188   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3189   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3190 
3191   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3192   /* off-process column indices */
3193   count = 0;
3194   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3195   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3196 
3197   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3198   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3199   for (i=0; i<Bn; i++) {
3200     if (PetscRealPart(xarray[i]) > -1.0) {
3201       idx[count]     = i;                   /* local column index in off-diagonal part B */
3202       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3203       count++;
3204     }
3205   }
3206   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3207   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3208 
3209   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3210   /* cannot ensure iscol_o has same blocksize as iscol! */
3211 
3212   ierr = PetscFree(idx);CHKERRQ(ierr);
3213   *garray = cmap1;
3214 
3215   ierr = VecDestroy(&x);CHKERRQ(ierr);
3216   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3217   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3222 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3223 {
3224   PetscErrorCode ierr;
3225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3226   Mat            M = NULL;
3227   MPI_Comm       comm;
3228   IS             iscol_d,isrow_d,iscol_o;
3229   Mat            Asub = NULL,Bsub = NULL;
3230   PetscInt       n;
3231 
3232   PetscFunctionBegin;
3233   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3234 
3235   if (call == MAT_REUSE_MATRIX) {
3236     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3237     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3238     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3239 
3240     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3241     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3242 
3243     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3244     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3245 
3246     /* Update diagonal and off-diagonal portions of submat */
3247     asub = (Mat_MPIAIJ*)(*submat)->data;
3248     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3249     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3250     if (n) {
3251       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3252     }
3253     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3255 
3256   } else { /* call == MAT_INITIAL_MATRIX) */
3257     const PetscInt *garray;
3258     PetscInt        BsubN;
3259 
3260     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3261     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3262 
3263     /* Create local submatrices Asub and Bsub */
3264     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3265     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3266 
3267     /* Create submatrix M */
3268     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3269 
3270     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3271     asub = (Mat_MPIAIJ*)M->data;
3272 
3273     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3274     n = asub->B->cmap->N;
3275     if (BsubN > n) {
3276       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3277       const PetscInt *idx;
3278       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3279       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3280 
3281       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3282       j = 0;
3283       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3284       for (i=0; i<n; i++) {
3285         if (j >= BsubN) break;
3286         while (subgarray[i] > garray[j]) j++;
3287 
3288         if (subgarray[i] == garray[j]) {
3289           idx_new[i] = idx[j++];
3290         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3291       }
3292       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3293 
3294       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3295       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3296 
3297     } else if (BsubN < n) {
3298       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3299     }
3300 
3301     ierr = PetscFree(garray);CHKERRQ(ierr);
3302     *submat = M;
3303 
3304     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3305     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3306     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3307 
3308     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3309     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3310 
3311     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3312     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3313   }
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3318 {
3319   PetscErrorCode ierr;
3320   IS             iscol_local=NULL,isrow_d;
3321   PetscInt       csize;
3322   PetscInt       n,i,j,start,end;
3323   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3324   MPI_Comm       comm;
3325 
3326   PetscFunctionBegin;
3327   /* If isrow has same processor distribution as mat,
3328      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3329   if (call == MAT_REUSE_MATRIX) {
3330     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3331     if (isrow_d) {
3332       sameRowDist  = PETSC_TRUE;
3333       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3334     } else {
3335       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3336       if (iscol_local) {
3337         sameRowDist  = PETSC_TRUE;
3338         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3339       }
3340     }
3341   } else {
3342     /* Check if isrow has same processor distribution as mat */
3343     sameDist[0] = PETSC_FALSE;
3344     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3345     if (!n) {
3346       sameDist[0] = PETSC_TRUE;
3347     } else {
3348       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3349       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3350       if (i >= start && j < end) {
3351         sameDist[0] = PETSC_TRUE;
3352       }
3353     }
3354 
3355     /* Check if iscol has same processor distribution as mat */
3356     sameDist[1] = PETSC_FALSE;
3357     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3358     if (!n) {
3359       sameDist[1] = PETSC_TRUE;
3360     } else {
3361       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3362       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3363       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3364     }
3365 
3366     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3367     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3368     sameRowDist = tsameDist[0];
3369   }
3370 
3371   if (sameRowDist) {
3372     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3373       /* isrow and iscol have same processor distribution as mat */
3374       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3375       PetscFunctionReturn(0);
3376     } else { /* sameRowDist */
3377       /* isrow has same processor distribution as mat */
3378       if (call == MAT_INITIAL_MATRIX) {
3379         PetscBool sorted;
3380         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3381         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3382         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3383         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3384 
3385         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3386         if (sorted) {
3387           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3388           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3389           PetscFunctionReturn(0);
3390         }
3391       } else { /* call == MAT_REUSE_MATRIX */
3392         IS iscol_sub;
3393         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3394         if (iscol_sub) {
3395           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3396           PetscFunctionReturn(0);
3397         }
3398       }
3399     }
3400   }
3401 
3402   /* General case: iscol -> iscol_local which has global size of iscol */
3403   if (call == MAT_REUSE_MATRIX) {
3404     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3405     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3406   } else {
3407     if (!iscol_local) {
3408       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3409     }
3410   }
3411 
3412   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3413   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3414 
3415   if (call == MAT_INITIAL_MATRIX) {
3416     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3417     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3418   }
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 /*@C
3423      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3424          and "off-diagonal" part of the matrix in CSR format.
3425 
3426    Collective
3427 
3428    Input Parameters:
3429 +  comm - MPI communicator
3430 .  A - "diagonal" portion of matrix
3431 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3432 -  garray - global index of B columns
3433 
3434    Output Parameter:
3435 .   mat - the matrix, with input A as its local diagonal matrix
3436    Level: advanced
3437 
3438    Notes:
3439        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3440        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3441 
3442 .seealso: MatCreateMPIAIJWithSplitArrays()
3443 @*/
3444 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3445 {
3446   PetscErrorCode    ierr;
3447   Mat_MPIAIJ        *maij;
3448   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3449   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3450   const PetscScalar *oa;
3451   Mat               Bnew;
3452   PetscInt          m,n,N;
3453 
3454   PetscFunctionBegin;
3455   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3456   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3457   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3458   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3459   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3460   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3461 
3462   /* Get global columns of mat */
3463   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3464 
3465   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3466   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3467   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3468   maij = (Mat_MPIAIJ*)(*mat)->data;
3469 
3470   (*mat)->preallocated = PETSC_TRUE;
3471 
3472   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3473   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3474 
3475   /* Set A as diagonal portion of *mat */
3476   maij->A = A;
3477 
3478   nz = oi[m];
3479   for (i=0; i<nz; i++) {
3480     col   = oj[i];
3481     oj[i] = garray[col];
3482   }
3483 
3484   /* Set Bnew as off-diagonal portion of *mat */
3485   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3486   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3487   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3488   bnew        = (Mat_SeqAIJ*)Bnew->data;
3489   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3490   maij->B     = Bnew;
3491 
3492   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3493 
3494   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3495   b->free_a       = PETSC_FALSE;
3496   b->free_ij      = PETSC_FALSE;
3497   ierr = MatDestroy(&B);CHKERRQ(ierr);
3498 
3499   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3500   bnew->free_a       = PETSC_TRUE;
3501   bnew->free_ij      = PETSC_TRUE;
3502 
3503   /* condense columns of maij->B */
3504   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3505   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3506   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3507   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3508   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3509   PetscFunctionReturn(0);
3510 }
3511 
3512 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3513 
3514 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3515 {
3516   PetscErrorCode ierr;
3517   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3518   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3519   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3520   Mat            M,Msub,B=a->B;
3521   MatScalar      *aa;
3522   Mat_SeqAIJ     *aij;
3523   PetscInt       *garray = a->garray,*colsub,Ncols;
3524   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3525   IS             iscol_sub,iscmap;
3526   const PetscInt *is_idx,*cmap;
3527   PetscBool      allcolumns=PETSC_FALSE;
3528   MPI_Comm       comm;
3529 
3530   PetscFunctionBegin;
3531   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3532   if (call == MAT_REUSE_MATRIX) {
3533     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3534     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3535     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3536 
3537     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3538     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3539 
3540     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3541     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3542 
3543     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3544 
3545   } else { /* call == MAT_INITIAL_MATRIX) */
3546     PetscBool flg;
3547 
3548     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3549     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3550 
3551     /* (1) iscol -> nonscalable iscol_local */
3552     /* Check for special case: each processor gets entire matrix columns */
3553     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3554     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3555     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3556     if (allcolumns) {
3557       iscol_sub = iscol_local;
3558       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3559       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3560 
3561     } else {
3562       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3563       PetscInt *idx,*cmap1,k;
3564       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3565       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3566       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3567       count = 0;
3568       k     = 0;
3569       for (i=0; i<Ncols; i++) {
3570         j = is_idx[i];
3571         if (j >= cstart && j < cend) {
3572           /* diagonal part of mat */
3573           idx[count]     = j;
3574           cmap1[count++] = i; /* column index in submat */
3575         } else if (Bn) {
3576           /* off-diagonal part of mat */
3577           if (j == garray[k]) {
3578             idx[count]     = j;
3579             cmap1[count++] = i;  /* column index in submat */
3580           } else if (j > garray[k]) {
3581             while (j > garray[k] && k < Bn-1) k++;
3582             if (j == garray[k]) {
3583               idx[count]     = j;
3584               cmap1[count++] = i; /* column index in submat */
3585             }
3586           }
3587         }
3588       }
3589       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3590 
3591       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3592       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3593       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3594 
3595       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3596     }
3597 
3598     /* (3) Create sequential Msub */
3599     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3600   }
3601 
3602   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3603   aij  = (Mat_SeqAIJ*)(Msub)->data;
3604   ii   = aij->i;
3605   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3606 
3607   /*
3608       m - number of local rows
3609       Ncols - number of columns (same on all processors)
3610       rstart - first row in new global matrix generated
3611   */
3612   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3613 
3614   if (call == MAT_INITIAL_MATRIX) {
3615     /* (4) Create parallel newmat */
3616     PetscMPIInt    rank,size;
3617     PetscInt       csize;
3618 
3619     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3620     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3621 
3622     /*
3623         Determine the number of non-zeros in the diagonal and off-diagonal
3624         portions of the matrix in order to do correct preallocation
3625     */
3626 
3627     /* first get start and end of "diagonal" columns */
3628     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3629     if (csize == PETSC_DECIDE) {
3630       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3631       if (mglobal == Ncols) { /* square matrix */
3632         nlocal = m;
3633       } else {
3634         nlocal = Ncols/size + ((Ncols % size) > rank);
3635       }
3636     } else {
3637       nlocal = csize;
3638     }
3639     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3640     rstart = rend - nlocal;
3641     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3642 
3643     /* next, compute all the lengths */
3644     jj    = aij->j;
3645     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3646     olens = dlens + m;
3647     for (i=0; i<m; i++) {
3648       jend = ii[i+1] - ii[i];
3649       olen = 0;
3650       dlen = 0;
3651       for (j=0; j<jend; j++) {
3652         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3653         else dlen++;
3654         jj++;
3655       }
3656       olens[i] = olen;
3657       dlens[i] = dlen;
3658     }
3659 
3660     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3661     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3662 
3663     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3664     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3665     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3666     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3667     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3668     ierr = PetscFree(dlens);CHKERRQ(ierr);
3669 
3670   } else { /* call == MAT_REUSE_MATRIX */
3671     M    = *newmat;
3672     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3673     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3674     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3675     /*
3676          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3677        rather than the slower MatSetValues().
3678     */
3679     M->was_assembled = PETSC_TRUE;
3680     M->assembled     = PETSC_FALSE;
3681   }
3682 
3683   /* (5) Set values of Msub to *newmat */
3684   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3685   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3686 
3687   jj   = aij->j;
3688   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3689   for (i=0; i<m; i++) {
3690     row = rstart + i;
3691     nz  = ii[i+1] - ii[i];
3692     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3693     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3694     jj += nz; aa += nz;
3695   }
3696   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3697   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3698 
3699   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3700   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3701 
3702   ierr = PetscFree(colsub);CHKERRQ(ierr);
3703 
3704   /* save Msub, iscol_sub and iscmap used in processor for next request */
3705   if (call == MAT_INITIAL_MATRIX) {
3706     *newmat = M;
3707     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3708     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3709 
3710     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3711     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3712 
3713     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3714     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3715 
3716     if (iscol_local) {
3717       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3718       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3719     }
3720   }
3721   PetscFunctionReturn(0);
3722 }
3723 
3724 /*
3725     Not great since it makes two copies of the submatrix, first an SeqAIJ
3726   in local and then by concatenating the local matrices the end result.
3727   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3728 
3729   Note: This requires a sequential iscol with all indices.
3730 */
3731 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3732 {
3733   PetscErrorCode ierr;
3734   PetscMPIInt    rank,size;
3735   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3736   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3737   Mat            M,Mreuse;
3738   MatScalar      *aa,*vwork;
3739   MPI_Comm       comm;
3740   Mat_SeqAIJ     *aij;
3741   PetscBool      colflag,allcolumns=PETSC_FALSE;
3742 
3743   PetscFunctionBegin;
3744   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3745   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3746   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3747 
3748   /* Check for special case: each processor gets entire matrix columns */
3749   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3750   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3751   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3752   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3753 
3754   if (call ==  MAT_REUSE_MATRIX) {
3755     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3756     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3757     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3758   } else {
3759     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3760   }
3761 
3762   /*
3763       m - number of local rows
3764       n - number of columns (same on all processors)
3765       rstart - first row in new global matrix generated
3766   */
3767   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3768   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3769   if (call == MAT_INITIAL_MATRIX) {
3770     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3771     ii  = aij->i;
3772     jj  = aij->j;
3773 
3774     /*
3775         Determine the number of non-zeros in the diagonal and off-diagonal
3776         portions of the matrix in order to do correct preallocation
3777     */
3778 
3779     /* first get start and end of "diagonal" columns */
3780     if (csize == PETSC_DECIDE) {
3781       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3782       if (mglobal == n) { /* square matrix */
3783         nlocal = m;
3784       } else {
3785         nlocal = n/size + ((n % size) > rank);
3786       }
3787     } else {
3788       nlocal = csize;
3789     }
3790     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3791     rstart = rend - nlocal;
3792     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3793 
3794     /* next, compute all the lengths */
3795     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3796     olens = dlens + m;
3797     for (i=0; i<m; i++) {
3798       jend = ii[i+1] - ii[i];
3799       olen = 0;
3800       dlen = 0;
3801       for (j=0; j<jend; j++) {
3802         if (*jj < rstart || *jj >= rend) olen++;
3803         else dlen++;
3804         jj++;
3805       }
3806       olens[i] = olen;
3807       dlens[i] = dlen;
3808     }
3809     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3810     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3811     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3812     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3813     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3814     ierr = PetscFree(dlens);CHKERRQ(ierr);
3815   } else {
3816     PetscInt ml,nl;
3817 
3818     M    = *newmat;
3819     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3820     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3821     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3822     /*
3823          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3824        rather than the slower MatSetValues().
3825     */
3826     M->was_assembled = PETSC_TRUE;
3827     M->assembled     = PETSC_FALSE;
3828   }
3829   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3830   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3831   ii   = aij->i;
3832   jj   = aij->j;
3833 
3834   /* trigger copy to CPU if needed */
3835   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3836   for (i=0; i<m; i++) {
3837     row   = rstart + i;
3838     nz    = ii[i+1] - ii[i];
3839     cwork = jj; jj += nz;
3840     vwork = aa; aa += nz;
3841     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3842   }
3843   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3844 
3845   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3847   *newmat = M;
3848 
3849   /* save submatrix used in processor for next request */
3850   if (call ==  MAT_INITIAL_MATRIX) {
3851     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3852     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3853   }
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3858 {
3859   PetscInt       m,cstart, cend,j,nnz,i,d;
3860   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3861   const PetscInt *JJ;
3862   PetscErrorCode ierr;
3863   PetscBool      nooffprocentries;
3864 
3865   PetscFunctionBegin;
3866   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3867 
3868   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3869   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3870   m      = B->rmap->n;
3871   cstart = B->cmap->rstart;
3872   cend   = B->cmap->rend;
3873   rstart = B->rmap->rstart;
3874 
3875   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3876 
3877   if (PetscDefined(USE_DEBUG)) {
3878     for (i=0; i<m; i++) {
3879       nnz = Ii[i+1]- Ii[i];
3880       JJ  = J + Ii[i];
3881       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3882       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3883       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3884     }
3885   }
3886 
3887   for (i=0; i<m; i++) {
3888     nnz     = Ii[i+1]- Ii[i];
3889     JJ      = J + Ii[i];
3890     nnz_max = PetscMax(nnz_max,nnz);
3891     d       = 0;
3892     for (j=0; j<nnz; j++) {
3893       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3894     }
3895     d_nnz[i] = d;
3896     o_nnz[i] = nnz - d;
3897   }
3898   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3899   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3900 
3901   for (i=0; i<m; i++) {
3902     ii   = i + rstart;
3903     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3904   }
3905   nooffprocentries    = B->nooffprocentries;
3906   B->nooffprocentries = PETSC_TRUE;
3907   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3908   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3909   B->nooffprocentries = nooffprocentries;
3910 
3911   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3912   PetscFunctionReturn(0);
3913 }
3914 
3915 /*@
3916    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3917    (the default parallel PETSc format).
3918 
3919    Collective
3920 
3921    Input Parameters:
3922 +  B - the matrix
3923 .  i - the indices into j for the start of each local row (starts with zero)
3924 .  j - the column indices for each local row (starts with zero)
3925 -  v - optional values in the matrix
3926 
3927    Level: developer
3928 
3929    Notes:
3930        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3931      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3932      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3933 
3934        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3935 
3936        The format which is used for the sparse matrix input, is equivalent to a
3937     row-major ordering.. i.e for the following matrix, the input data expected is
3938     as shown
3939 
3940 $        1 0 0
3941 $        2 0 3     P0
3942 $       -------
3943 $        4 5 6     P1
3944 $
3945 $     Process0 [P0]: rows_owned=[0,1]
3946 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3947 $        j =  {0,0,2}  [size = 3]
3948 $        v =  {1,2,3}  [size = 3]
3949 $
3950 $     Process1 [P1]: rows_owned=[2]
3951 $        i =  {0,3}    [size = nrow+1  = 1+1]
3952 $        j =  {0,1,2}  [size = 3]
3953 $        v =  {4,5,6}  [size = 3]
3954 
3955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3956           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3957 @*/
3958 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3959 {
3960   PetscErrorCode ierr;
3961 
3962   PetscFunctionBegin;
3963   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3964   PetscFunctionReturn(0);
3965 }
3966 
3967 /*@C
3968    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3969    (the default parallel PETSc format).  For good matrix assembly performance
3970    the user should preallocate the matrix storage by setting the parameters
3971    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3972    performance can be increased by more than a factor of 50.
3973 
3974    Collective
3975 
3976    Input Parameters:
3977 +  B - the matrix
3978 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3979            (same value is used for all local rows)
3980 .  d_nnz - array containing the number of nonzeros in the various rows of the
3981            DIAGONAL portion of the local submatrix (possibly different for each row)
3982            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3983            The size of this array is equal to the number of local rows, i.e 'm'.
3984            For matrices that will be factored, you must leave room for (and set)
3985            the diagonal entry even if it is zero.
3986 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3987            submatrix (same value is used for all local rows).
3988 -  o_nnz - array containing the number of nonzeros in the various rows of the
3989            OFF-DIAGONAL portion of the local submatrix (possibly different for
3990            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3991            structure. The size of this array is equal to the number
3992            of local rows, i.e 'm'.
3993 
3994    If the *_nnz parameter is given then the *_nz parameter is ignored
3995 
3996    The AIJ format (also called the Yale sparse matrix format or
3997    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3998    storage.  The stored row and column indices begin with zero.
3999    See Users-Manual: ch_mat for details.
4000 
4001    The parallel matrix is partitioned such that the first m0 rows belong to
4002    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4003    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4004 
4005    The DIAGONAL portion of the local submatrix of a processor can be defined
4006    as the submatrix which is obtained by extraction the part corresponding to
4007    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4008    first row that belongs to the processor, r2 is the last row belonging to
4009    the this processor, and c1-c2 is range of indices of the local part of a
4010    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4011    common case of a square matrix, the row and column ranges are the same and
4012    the DIAGONAL part is also square. The remaining portion of the local
4013    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4014 
4015    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4016 
4017    You can call MatGetInfo() to get information on how effective the preallocation was;
4018    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4019    You can also run with the option -info and look for messages with the string
4020    malloc in them to see if additional memory allocation was needed.
4021 
4022    Example usage:
4023 
4024    Consider the following 8x8 matrix with 34 non-zero values, that is
4025    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4026    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4027    as follows:
4028 
4029 .vb
4030             1  2  0  |  0  3  0  |  0  4
4031     Proc0   0  5  6  |  7  0  0  |  8  0
4032             9  0 10  | 11  0  0  | 12  0
4033     -------------------------------------
4034            13  0 14  | 15 16 17  |  0  0
4035     Proc1   0 18  0  | 19 20 21  |  0  0
4036             0  0  0  | 22 23  0  | 24  0
4037     -------------------------------------
4038     Proc2  25 26 27  |  0  0 28  | 29  0
4039            30  0  0  | 31 32 33  |  0 34
4040 .ve
4041 
4042    This can be represented as a collection of submatrices as:
4043 
4044 .vb
4045       A B C
4046       D E F
4047       G H I
4048 .ve
4049 
4050    Where the submatrices A,B,C are owned by proc0, D,E,F are
4051    owned by proc1, G,H,I are owned by proc2.
4052 
4053    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4054    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4055    The 'M','N' parameters are 8,8, and have the same values on all procs.
4056 
4057    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4058    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4059    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4060    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4061    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4062    matrix, ans [DF] as another SeqAIJ matrix.
4063 
4064    When d_nz, o_nz parameters are specified, d_nz storage elements are
4065    allocated for every row of the local diagonal submatrix, and o_nz
4066    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4067    One way to choose d_nz and o_nz is to use the max nonzerors per local
4068    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4069    In this case, the values of d_nz,o_nz are:
4070 .vb
4071      proc0 : dnz = 2, o_nz = 2
4072      proc1 : dnz = 3, o_nz = 2
4073      proc2 : dnz = 1, o_nz = 4
4074 .ve
4075    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4076    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4077    for proc3. i.e we are using 12+15+10=37 storage locations to store
4078    34 values.
4079 
4080    When d_nnz, o_nnz parameters are specified, the storage is specified
4081    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4082    In the above case the values for d_nnz,o_nnz are:
4083 .vb
4084      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4085      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4086      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4087 .ve
4088    Here the space allocated is sum of all the above values i.e 34, and
4089    hence pre-allocation is perfect.
4090 
4091    Level: intermediate
4092 
4093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4094           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4095 @*/
4096 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4097 {
4098   PetscErrorCode ierr;
4099 
4100   PetscFunctionBegin;
4101   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4102   PetscValidType(B,1);
4103   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4104   PetscFunctionReturn(0);
4105 }
4106 
4107 /*@
4108      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4109          CSR format for the local rows.
4110 
4111    Collective
4112 
4113    Input Parameters:
4114 +  comm - MPI communicator
4115 .  m - number of local rows (Cannot be PETSC_DECIDE)
4116 .  n - This value should be the same as the local size used in creating the
4117        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4118        calculated if N is given) For square matrices n is almost always m.
4119 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4120 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4121 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4122 .   j - column indices
4123 -   a - matrix values
4124 
4125    Output Parameter:
4126 .   mat - the matrix
4127 
4128    Level: intermediate
4129 
4130    Notes:
4131        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4132      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4133      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4134 
4135        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4136 
4137        The format which is used for the sparse matrix input, is equivalent to a
4138     row-major ordering.. i.e for the following matrix, the input data expected is
4139     as shown
4140 
4141        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4142 
4143 $        1 0 0
4144 $        2 0 3     P0
4145 $       -------
4146 $        4 5 6     P1
4147 $
4148 $     Process0 [P0]: rows_owned=[0,1]
4149 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4150 $        j =  {0,0,2}  [size = 3]
4151 $        v =  {1,2,3}  [size = 3]
4152 $
4153 $     Process1 [P1]: rows_owned=[2]
4154 $        i =  {0,3}    [size = nrow+1  = 1+1]
4155 $        j =  {0,1,2}  [size = 3]
4156 $        v =  {4,5,6}  [size = 3]
4157 
4158 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4159           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4160 @*/
4161 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4162 {
4163   PetscErrorCode ierr;
4164 
4165   PetscFunctionBegin;
4166   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4167   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4168   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4169   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4170   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4171   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4172   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4173   PetscFunctionReturn(0);
4174 }
4175 
4176 /*@
4177      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4178          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4179 
4180    Collective
4181 
4182    Input Parameters:
4183 +  mat - the matrix
4184 .  m - number of local rows (Cannot be PETSC_DECIDE)
4185 .  n - This value should be the same as the local size used in creating the
4186        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4187        calculated if N is given) For square matrices n is almost always m.
4188 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4189 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4190 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4191 .  J - column indices
4192 -  v - matrix values
4193 
4194    Level: intermediate
4195 
4196 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4197           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4198 @*/
4199 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4200 {
4201   PetscErrorCode ierr;
4202   PetscInt       cstart,nnz,i,j;
4203   PetscInt       *ld;
4204   PetscBool      nooffprocentries;
4205   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4206   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4207   PetscScalar    *ad,*ao;
4208   const PetscInt *Adi = Ad->i;
4209   PetscInt       ldi,Iii,md;
4210 
4211   PetscFunctionBegin;
4212   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4213   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4214   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4215   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4216 
4217   ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4218   ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4219   cstart = mat->cmap->rstart;
4220   if (!Aij->ld) {
4221     /* count number of entries below block diagonal */
4222     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4223     Aij->ld = ld;
4224     for (i=0; i<m; i++) {
4225       nnz  = Ii[i+1]- Ii[i];
4226       j     = 0;
4227       while  (J[j] < cstart && j < nnz) {j++;}
4228       J    += nnz;
4229       ld[i] = j;
4230     }
4231   } else {
4232     ld = Aij->ld;
4233   }
4234 
4235   for (i=0; i<m; i++) {
4236     nnz  = Ii[i+1]- Ii[i];
4237     Iii  = Ii[i];
4238     ldi  = ld[i];
4239     md   = Adi[i+1]-Adi[i];
4240     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4241     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4242     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4243     ad  += md;
4244     ao  += nnz - md;
4245   }
4246   nooffprocentries      = mat->nooffprocentries;
4247   mat->nooffprocentries = PETSC_TRUE;
4248   ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4249   ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4250   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4251   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4252   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4253   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4254   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4255   mat->nooffprocentries = nooffprocentries;
4256   PetscFunctionReturn(0);
4257 }
4258 
4259 /*@C
4260    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4261    (the default parallel PETSc format).  For good matrix assembly performance
4262    the user should preallocate the matrix storage by setting the parameters
4263    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4264    performance can be increased by more than a factor of 50.
4265 
4266    Collective
4267 
4268    Input Parameters:
4269 +  comm - MPI communicator
4270 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4271            This value should be the same as the local size used in creating the
4272            y vector for the matrix-vector product y = Ax.
4273 .  n - This value should be the same as the local size used in creating the
4274        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4275        calculated if N is given) For square matrices n is almost always m.
4276 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4277 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4278 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4279            (same value is used for all local rows)
4280 .  d_nnz - array containing the number of nonzeros in the various rows of the
4281            DIAGONAL portion of the local submatrix (possibly different for each row)
4282            or NULL, if d_nz is used to specify the nonzero structure.
4283            The size of this array is equal to the number of local rows, i.e 'm'.
4284 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4285            submatrix (same value is used for all local rows).
4286 -  o_nnz - array containing the number of nonzeros in the various rows of the
4287            OFF-DIAGONAL portion of the local submatrix (possibly different for
4288            each row) or NULL, if o_nz is used to specify the nonzero
4289            structure. The size of this array is equal to the number
4290            of local rows, i.e 'm'.
4291 
4292    Output Parameter:
4293 .  A - the matrix
4294 
4295    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4296    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4297    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4298 
4299    Notes:
4300    If the *_nnz parameter is given then the *_nz parameter is ignored
4301 
4302    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4303    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4304    storage requirements for this matrix.
4305 
4306    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4307    processor than it must be used on all processors that share the object for
4308    that argument.
4309 
4310    The user MUST specify either the local or global matrix dimensions
4311    (possibly both).
4312 
4313    The parallel matrix is partitioned across processors such that the
4314    first m0 rows belong to process 0, the next m1 rows belong to
4315    process 1, the next m2 rows belong to process 2 etc.. where
4316    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4317    values corresponding to [m x N] submatrix.
4318 
4319    The columns are logically partitioned with the n0 columns belonging
4320    to 0th partition, the next n1 columns belonging to the next
4321    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4322 
4323    The DIAGONAL portion of the local submatrix on any given processor
4324    is the submatrix corresponding to the rows and columns m,n
4325    corresponding to the given processor. i.e diagonal matrix on
4326    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4327    etc. The remaining portion of the local submatrix [m x (N-n)]
4328    constitute the OFF-DIAGONAL portion. The example below better
4329    illustrates this concept.
4330 
4331    For a square global matrix we define each processor's diagonal portion
4332    to be its local rows and the corresponding columns (a square submatrix);
4333    each processor's off-diagonal portion encompasses the remainder of the
4334    local matrix (a rectangular submatrix).
4335 
4336    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4337 
4338    When calling this routine with a single process communicator, a matrix of
4339    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4340    type of communicator, use the construction mechanism
4341 .vb
4342      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4343 .ve
4344 
4345 $     MatCreate(...,&A);
4346 $     MatSetType(A,MATMPIAIJ);
4347 $     MatSetSizes(A, m,n,M,N);
4348 $     MatMPIAIJSetPreallocation(A,...);
4349 
4350    By default, this format uses inodes (identical nodes) when possible.
4351    We search for consecutive rows with the same nonzero structure, thereby
4352    reusing matrix information to achieve increased efficiency.
4353 
4354    Options Database Keys:
4355 +  -mat_no_inode  - Do not use inodes
4356 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4357 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4358         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4359         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4360 
4361    Example usage:
4362 
4363    Consider the following 8x8 matrix with 34 non-zero values, that is
4364    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4365    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4366    as follows
4367 
4368 .vb
4369             1  2  0  |  0  3  0  |  0  4
4370     Proc0   0  5  6  |  7  0  0  |  8  0
4371             9  0 10  | 11  0  0  | 12  0
4372     -------------------------------------
4373            13  0 14  | 15 16 17  |  0  0
4374     Proc1   0 18  0  | 19 20 21  |  0  0
4375             0  0  0  | 22 23  0  | 24  0
4376     -------------------------------------
4377     Proc2  25 26 27  |  0  0 28  | 29  0
4378            30  0  0  | 31 32 33  |  0 34
4379 .ve
4380 
4381    This can be represented as a collection of submatrices as
4382 
4383 .vb
4384       A B C
4385       D E F
4386       G H I
4387 .ve
4388 
4389    Where the submatrices A,B,C are owned by proc0, D,E,F are
4390    owned by proc1, G,H,I are owned by proc2.
4391 
4392    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4393    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4394    The 'M','N' parameters are 8,8, and have the same values on all procs.
4395 
4396    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4397    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4398    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4399    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4400    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4401    matrix, ans [DF] as another SeqAIJ matrix.
4402 
4403    When d_nz, o_nz parameters are specified, d_nz storage elements are
4404    allocated for every row of the local diagonal submatrix, and o_nz
4405    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4406    One way to choose d_nz and o_nz is to use the max nonzerors per local
4407    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4408    In this case, the values of d_nz,o_nz are
4409 .vb
4410      proc0 : dnz = 2, o_nz = 2
4411      proc1 : dnz = 3, o_nz = 2
4412      proc2 : dnz = 1, o_nz = 4
4413 .ve
4414    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4415    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4416    for proc3. i.e we are using 12+15+10=37 storage locations to store
4417    34 values.
4418 
4419    When d_nnz, o_nnz parameters are specified, the storage is specified
4420    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4421    In the above case the values for d_nnz,o_nnz are
4422 .vb
4423      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4424      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4425      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4426 .ve
4427    Here the space allocated is sum of all the above values i.e 34, and
4428    hence pre-allocation is perfect.
4429 
4430    Level: intermediate
4431 
4432 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4433           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4434 @*/
4435 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4436 {
4437   PetscErrorCode ierr;
4438   PetscMPIInt    size;
4439 
4440   PetscFunctionBegin;
4441   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4442   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4443   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4444   if (size > 1) {
4445     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4446     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4447   } else {
4448     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4449     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4450   }
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 /*@C
4455   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4456 
4457   Not collective
4458 
4459   Input Parameter:
4460 . A - The MPIAIJ matrix
4461 
4462   Output Parameters:
4463 + Ad - The local diagonal block as a SeqAIJ matrix
4464 . Ao - The local off-diagonal block as a SeqAIJ matrix
4465 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4466 
4467   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4468   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4469   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4470   local column numbers to global column numbers in the original matrix.
4471 
4472   Level: intermediate
4473 
4474 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4475 @*/
4476 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4477 {
4478   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4479   PetscBool      flg;
4480   PetscErrorCode ierr;
4481 
4482   PetscFunctionBegin;
4483   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4484   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4485   if (Ad)     *Ad     = a->A;
4486   if (Ao)     *Ao     = a->B;
4487   if (colmap) *colmap = a->garray;
4488   PetscFunctionReturn(0);
4489 }
4490 
4491 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4492 {
4493   PetscErrorCode ierr;
4494   PetscInt       m,N,i,rstart,nnz,Ii;
4495   PetscInt       *indx;
4496   PetscScalar    *values;
4497   MatType        rootType;
4498 
4499   PetscFunctionBegin;
4500   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4501   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4502     PetscInt       *dnz,*onz,sum,bs,cbs;
4503 
4504     if (n == PETSC_DECIDE) {
4505       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4506     }
4507     /* Check sum(n) = N */
4508     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4509     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4510 
4511     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4512     rstart -= m;
4513 
4514     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4515     for (i=0; i<m; i++) {
4516       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4517       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4518       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4519     }
4520 
4521     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4522     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4523     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4524     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4525     ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr);
4526     ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr);
4527     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4528     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4529     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4530     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4531   }
4532 
4533   /* numeric phase */
4534   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4535   for (i=0; i<m; i++) {
4536     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4537     Ii   = i + rstart;
4538     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4539     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4540   }
4541   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4542   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4543   PetscFunctionReturn(0);
4544 }
4545 
4546 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4547 {
4548   PetscErrorCode    ierr;
4549   PetscMPIInt       rank;
4550   PetscInt          m,N,i,rstart,nnz;
4551   size_t            len;
4552   const PetscInt    *indx;
4553   PetscViewer       out;
4554   char              *name;
4555   Mat               B;
4556   const PetscScalar *values;
4557 
4558   PetscFunctionBegin;
4559   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4560   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4561   /* Should this be the type of the diagonal block of A? */
4562   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4563   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4564   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4565   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4566   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4567   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4568   for (i=0; i<m; i++) {
4569     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4570     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4571     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4572   }
4573   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4574   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4575 
4576   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4577   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4578   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4579   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4580   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4581   ierr = PetscFree(name);CHKERRQ(ierr);
4582   ierr = MatView(B,out);CHKERRQ(ierr);
4583   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4584   ierr = MatDestroy(&B);CHKERRQ(ierr);
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4589 {
4590   PetscErrorCode      ierr;
4591   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4592 
4593   PetscFunctionBegin;
4594   if (!merge) PetscFunctionReturn(0);
4595   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4596   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4597   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4598   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4599   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4600   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4601   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4602   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4603   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4604   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4605   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4606   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4607   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4608   ierr = PetscFree(merge);CHKERRQ(ierr);
4609   PetscFunctionReturn(0);
4610 }
4611 
4612 #include <../src/mat/utils/freespace.h>
4613 #include <petscbt.h>
4614 
4615 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4616 {
4617   PetscErrorCode      ierr;
4618   MPI_Comm            comm;
4619   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4620   PetscMPIInt         size,rank,taga,*len_s;
4621   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4622   PetscInt            proc,m;
4623   PetscInt            **buf_ri,**buf_rj;
4624   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4625   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4626   MPI_Request         *s_waits,*r_waits;
4627   MPI_Status          *status;
4628   const MatScalar     *aa,*a_a;
4629   MatScalar           **abuf_r,*ba_i;
4630   Mat_Merge_SeqsToMPI *merge;
4631   PetscContainer      container;
4632 
4633   PetscFunctionBegin;
4634   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4635   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4636 
4637   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4638   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4639 
4640   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4641   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4642   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4643   ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4644   aa   = a_a;
4645 
4646   bi     = merge->bi;
4647   bj     = merge->bj;
4648   buf_ri = merge->buf_ri;
4649   buf_rj = merge->buf_rj;
4650 
4651   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4652   owners = merge->rowmap->range;
4653   len_s  = merge->len_s;
4654 
4655   /* send and recv matrix values */
4656   /*-----------------------------*/
4657   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4658   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4659 
4660   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4661   for (proc=0,k=0; proc<size; proc++) {
4662     if (!len_s[proc]) continue;
4663     i    = owners[proc];
4664     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4665     k++;
4666   }
4667 
4668   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4669   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4670   ierr = PetscFree(status);CHKERRQ(ierr);
4671 
4672   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4673   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4674 
4675   /* insert mat values of mpimat */
4676   /*----------------------------*/
4677   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4678   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4679 
4680   for (k=0; k<merge->nrecv; k++) {
4681     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4682     nrows       = *(buf_ri_k[k]);
4683     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4684     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4685   }
4686 
4687   /* set values of ba */
4688   m    = merge->rowmap->n;
4689   for (i=0; i<m; i++) {
4690     arow = owners[rank] + i;
4691     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4692     bnzi = bi[i+1] - bi[i];
4693     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4694 
4695     /* add local non-zero vals of this proc's seqmat into ba */
4696     anzi   = ai[arow+1] - ai[arow];
4697     aj     = a->j + ai[arow];
4698     aa     = a_a + ai[arow];
4699     nextaj = 0;
4700     for (j=0; nextaj<anzi; j++) {
4701       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4702         ba_i[j] += aa[nextaj++];
4703       }
4704     }
4705 
4706     /* add received vals into ba */
4707     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4708       /* i-th row */
4709       if (i == *nextrow[k]) {
4710         anzi   = *(nextai[k]+1) - *nextai[k];
4711         aj     = buf_rj[k] + *(nextai[k]);
4712         aa     = abuf_r[k] + *(nextai[k]);
4713         nextaj = 0;
4714         for (j=0; nextaj<anzi; j++) {
4715           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4716             ba_i[j] += aa[nextaj++];
4717           }
4718         }
4719         nextrow[k]++; nextai[k]++;
4720       }
4721     }
4722     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4723   }
4724   ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4725   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4726   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4727 
4728   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4729   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4730   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4731   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4732   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4733   PetscFunctionReturn(0);
4734 }
4735 
4736 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4737 {
4738   PetscErrorCode      ierr;
4739   Mat                 B_mpi;
4740   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4741   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4742   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4743   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4744   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4745   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4746   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4747   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4748   MPI_Status          *status;
4749   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4750   PetscBT             lnkbt;
4751   Mat_Merge_SeqsToMPI *merge;
4752   PetscContainer      container;
4753 
4754   PetscFunctionBegin;
4755   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4756 
4757   /* make sure it is a PETSc comm */
4758   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4759   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4760   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4761 
4762   ierr = PetscNew(&merge);CHKERRQ(ierr);
4763   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4764 
4765   /* determine row ownership */
4766   /*---------------------------------------------------------*/
4767   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4768   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4769   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4770   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4771   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4772   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4773   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4774 
4775   m      = merge->rowmap->n;
4776   owners = merge->rowmap->range;
4777 
4778   /* determine the number of messages to send, their lengths */
4779   /*---------------------------------------------------------*/
4780   len_s = merge->len_s;
4781 
4782   len          = 0; /* length of buf_si[] */
4783   merge->nsend = 0;
4784   for (proc=0; proc<size; proc++) {
4785     len_si[proc] = 0;
4786     if (proc == rank) {
4787       len_s[proc] = 0;
4788     } else {
4789       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4790       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4791     }
4792     if (len_s[proc]) {
4793       merge->nsend++;
4794       nrows = 0;
4795       for (i=owners[proc]; i<owners[proc+1]; i++) {
4796         if (ai[i+1] > ai[i]) nrows++;
4797       }
4798       len_si[proc] = 2*(nrows+1);
4799       len         += len_si[proc];
4800     }
4801   }
4802 
4803   /* determine the number and length of messages to receive for ij-structure */
4804   /*-------------------------------------------------------------------------*/
4805   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4806   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4807 
4808   /* post the Irecv of j-structure */
4809   /*-------------------------------*/
4810   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4811   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4812 
4813   /* post the Isend of j-structure */
4814   /*--------------------------------*/
4815   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4816 
4817   for (proc=0, k=0; proc<size; proc++) {
4818     if (!len_s[proc]) continue;
4819     i    = owners[proc];
4820     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4821     k++;
4822   }
4823 
4824   /* receives and sends of j-structure are complete */
4825   /*------------------------------------------------*/
4826   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4827   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4828 
4829   /* send and recv i-structure */
4830   /*---------------------------*/
4831   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4832   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4833 
4834   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4835   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4836   for (proc=0,k=0; proc<size; proc++) {
4837     if (!len_s[proc]) continue;
4838     /* form outgoing message for i-structure:
4839          buf_si[0]:                 nrows to be sent
4840                [1:nrows]:           row index (global)
4841                [nrows+1:2*nrows+1]: i-structure index
4842     */
4843     /*-------------------------------------------*/
4844     nrows       = len_si[proc]/2 - 1;
4845     buf_si_i    = buf_si + nrows+1;
4846     buf_si[0]   = nrows;
4847     buf_si_i[0] = 0;
4848     nrows       = 0;
4849     for (i=owners[proc]; i<owners[proc+1]; i++) {
4850       anzi = ai[i+1] - ai[i];
4851       if (anzi) {
4852         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4853         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4854         nrows++;
4855       }
4856     }
4857     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4858     k++;
4859     buf_si += len_si[proc];
4860   }
4861 
4862   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4863   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4864 
4865   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4866   for (i=0; i<merge->nrecv; i++) {
4867     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4868   }
4869 
4870   ierr = PetscFree(len_si);CHKERRQ(ierr);
4871   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4872   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4873   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4874   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4875   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4876   ierr = PetscFree(status);CHKERRQ(ierr);
4877 
4878   /* compute a local seq matrix in each processor */
4879   /*----------------------------------------------*/
4880   /* allocate bi array and free space for accumulating nonzero column info */
4881   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4882   bi[0] = 0;
4883 
4884   /* create and initialize a linked list */
4885   nlnk = N+1;
4886   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4887 
4888   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4889   len  = ai[owners[rank+1]] - ai[owners[rank]];
4890   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4891 
4892   current_space = free_space;
4893 
4894   /* determine symbolic info for each local row */
4895   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4896 
4897   for (k=0; k<merge->nrecv; k++) {
4898     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4899     nrows       = *buf_ri_k[k];
4900     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4901     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4902   }
4903 
4904   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4905   len  = 0;
4906   for (i=0; i<m; i++) {
4907     bnzi = 0;
4908     /* add local non-zero cols of this proc's seqmat into lnk */
4909     arow  = owners[rank] + i;
4910     anzi  = ai[arow+1] - ai[arow];
4911     aj    = a->j + ai[arow];
4912     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4913     bnzi += nlnk;
4914     /* add received col data into lnk */
4915     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4916       if (i == *nextrow[k]) { /* i-th row */
4917         anzi  = *(nextai[k]+1) - *nextai[k];
4918         aj    = buf_rj[k] + *nextai[k];
4919         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4920         bnzi += nlnk;
4921         nextrow[k]++; nextai[k]++;
4922       }
4923     }
4924     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4925 
4926     /* if free space is not available, make more free space */
4927     if (current_space->local_remaining<bnzi) {
4928       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4929       nspacedouble++;
4930     }
4931     /* copy data into free space, then initialize lnk */
4932     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4933     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4934 
4935     current_space->array           += bnzi;
4936     current_space->local_used      += bnzi;
4937     current_space->local_remaining -= bnzi;
4938 
4939     bi[i+1] = bi[i] + bnzi;
4940   }
4941 
4942   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4943 
4944   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4945   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4946   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4947 
4948   /* create symbolic parallel matrix B_mpi */
4949   /*---------------------------------------*/
4950   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4951   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4952   if (n==PETSC_DECIDE) {
4953     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4954   } else {
4955     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4956   }
4957   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4958   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4959   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4960   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4961   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4962 
4963   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4964   B_mpi->assembled  = PETSC_FALSE;
4965   merge->bi         = bi;
4966   merge->bj         = bj;
4967   merge->buf_ri     = buf_ri;
4968   merge->buf_rj     = buf_rj;
4969   merge->coi        = NULL;
4970   merge->coj        = NULL;
4971   merge->owners_co  = NULL;
4972 
4973   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4974 
4975   /* attach the supporting struct to B_mpi for reuse */
4976   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4977   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4978   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4979   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4980   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4981   *mpimat = B_mpi;
4982 
4983   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4984   PetscFunctionReturn(0);
4985 }
4986 
4987 /*@C
4988       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4989                  matrices from each processor
4990 
4991     Collective
4992 
4993    Input Parameters:
4994 +    comm - the communicators the parallel matrix will live on
4995 .    seqmat - the input sequential matrices
4996 .    m - number of local rows (or PETSC_DECIDE)
4997 .    n - number of local columns (or PETSC_DECIDE)
4998 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4999 
5000    Output Parameter:
5001 .    mpimat - the parallel matrix generated
5002 
5003     Level: advanced
5004 
5005    Notes:
5006      The dimensions of the sequential matrix in each processor MUST be the same.
5007      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5008      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5009 @*/
5010 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5011 {
5012   PetscErrorCode ierr;
5013   PetscMPIInt    size;
5014 
5015   PetscFunctionBegin;
5016   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5017   if (size == 1) {
5018     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5019     if (scall == MAT_INITIAL_MATRIX) {
5020       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5021     } else {
5022       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5023     }
5024     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5025     PetscFunctionReturn(0);
5026   }
5027   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5028   if (scall == MAT_INITIAL_MATRIX) {
5029     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5030   }
5031   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5032   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5033   PetscFunctionReturn(0);
5034 }
5035 
5036 /*@
5037      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5038           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5039           with MatGetSize()
5040 
5041     Not Collective
5042 
5043    Input Parameters:
5044 +    A - the matrix
5045 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5046 
5047    Output Parameter:
5048 .    A_loc - the local sequential matrix generated
5049 
5050     Level: developer
5051 
5052    Notes:
5053      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5054      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5055      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5056      modify the values of the returned A_loc.
5057 
5058 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5059 @*/
5060 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5061 {
5062   PetscErrorCode    ierr;
5063   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5064   Mat_SeqAIJ        *mat,*a,*b;
5065   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5066   const PetscScalar *aa,*ba,*aav,*bav;
5067   PetscScalar       *ca,*cam;
5068   PetscMPIInt       size;
5069   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5070   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5071   PetscBool         match;
5072 
5073   PetscFunctionBegin;
5074   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5075   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5076   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5077   if (size == 1) {
5078     if (scall == MAT_INITIAL_MATRIX) {
5079       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5080       *A_loc = mpimat->A;
5081     } else if (scall == MAT_REUSE_MATRIX) {
5082       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5083     }
5084     PetscFunctionReturn(0);
5085   }
5086 
5087   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5088   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5089   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5090   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5091   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5092   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5093   aa   = aav;
5094   ba   = bav;
5095   if (scall == MAT_INITIAL_MATRIX) {
5096     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5097     ci[0] = 0;
5098     for (i=0; i<am; i++) {
5099       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5100     }
5101     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5102     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5103     k    = 0;
5104     for (i=0; i<am; i++) {
5105       ncols_o = bi[i+1] - bi[i];
5106       ncols_d = ai[i+1] - ai[i];
5107       /* off-diagonal portion of A */
5108       for (jo=0; jo<ncols_o; jo++) {
5109         col = cmap[*bj];
5110         if (col >= cstart) break;
5111         cj[k]   = col; bj++;
5112         ca[k++] = *ba++;
5113       }
5114       /* diagonal portion of A */
5115       for (j=0; j<ncols_d; j++) {
5116         cj[k]   = cstart + *aj++;
5117         ca[k++] = *aa++;
5118       }
5119       /* off-diagonal portion of A */
5120       for (j=jo; j<ncols_o; j++) {
5121         cj[k]   = cmap[*bj++];
5122         ca[k++] = *ba++;
5123       }
5124     }
5125     /* put together the new matrix */
5126     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5127     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5128     /* Since these are PETSc arrays, change flags to free them as necessary. */
5129     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5130     mat->free_a  = PETSC_TRUE;
5131     mat->free_ij = PETSC_TRUE;
5132     mat->nonew   = 0;
5133   } else if (scall == MAT_REUSE_MATRIX) {
5134     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5135     ci   = mat->i;
5136     cj   = mat->j;
5137     ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5138     for (i=0; i<am; i++) {
5139       /* off-diagonal portion of A */
5140       ncols_o = bi[i+1] - bi[i];
5141       for (jo=0; jo<ncols_o; jo++) {
5142         col = cmap[*bj];
5143         if (col >= cstart) break;
5144         *cam++ = *ba++; bj++;
5145       }
5146       /* diagonal portion of A */
5147       ncols_d = ai[i+1] - ai[i];
5148       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5149       /* off-diagonal portion of A */
5150       for (j=jo; j<ncols_o; j++) {
5151         *cam++ = *ba++; bj++;
5152       }
5153     }
5154     ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5155   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5156   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5157   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5158   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5159   PetscFunctionReturn(0);
5160 }
5161 
5162 /*@
5163      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5164           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5165 
5166     Not Collective
5167 
5168    Input Parameters:
5169 +    A - the matrix
5170 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5171 
5172    Output Parameters:
5173 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5174 -    A_loc - the local sequential matrix generated
5175 
5176     Level: developer
5177 
5178    Notes:
5179      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5180 
5181 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5182 
5183 @*/
5184 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5185 {
5186   PetscErrorCode ierr;
5187   Mat            Ao,Ad;
5188   const PetscInt *cmap;
5189   PetscMPIInt    size;
5190   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5191 
5192   PetscFunctionBegin;
5193   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5194   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5195   if (size == 1) {
5196     if (scall == MAT_INITIAL_MATRIX) {
5197       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5198       *A_loc = Ad;
5199     } else if (scall == MAT_REUSE_MATRIX) {
5200       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5201     }
5202     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5203     PetscFunctionReturn(0);
5204   }
5205   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5206   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5207   if (f) {
5208     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5209   } else {
5210     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5211     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5212     Mat_SeqAIJ        *c;
5213     PetscInt          *ai = a->i, *aj = a->j;
5214     PetscInt          *bi = b->i, *bj = b->j;
5215     PetscInt          *ci,*cj;
5216     const PetscScalar *aa,*ba;
5217     PetscScalar       *ca;
5218     PetscInt          i,j,am,dn,on;
5219 
5220     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5221     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5222     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5223     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5224     if (scall == MAT_INITIAL_MATRIX) {
5225       PetscInt k;
5226       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5227       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5228       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5229       ci[0] = 0;
5230       for (i=0,k=0; i<am; i++) {
5231         const PetscInt ncols_o = bi[i+1] - bi[i];
5232         const PetscInt ncols_d = ai[i+1] - ai[i];
5233         ci[i+1] = ci[i] + ncols_o + ncols_d;
5234         /* diagonal portion of A */
5235         for (j=0; j<ncols_d; j++,k++) {
5236           cj[k] = *aj++;
5237           ca[k] = *aa++;
5238         }
5239         /* off-diagonal portion of A */
5240         for (j=0; j<ncols_o; j++,k++) {
5241           cj[k] = dn + *bj++;
5242           ca[k] = *ba++;
5243         }
5244       }
5245       /* put together the new matrix */
5246       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5247       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5248       /* Since these are PETSc arrays, change flags to free them as necessary. */
5249       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5250       c->free_a  = PETSC_TRUE;
5251       c->free_ij = PETSC_TRUE;
5252       c->nonew   = 0;
5253       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5254     } else if (scall == MAT_REUSE_MATRIX) {
5255       ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5256       for (i=0; i<am; i++) {
5257         const PetscInt ncols_d = ai[i+1] - ai[i];
5258         const PetscInt ncols_o = bi[i+1] - bi[i];
5259         /* diagonal portion of A */
5260         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5261         /* off-diagonal portion of A */
5262         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5263       }
5264       ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5265     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5266     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5267     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5268     if (glob) {
5269       PetscInt cst, *gidx;
5270 
5271       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5272       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5273       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5274       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5275       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5276     }
5277   }
5278   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5279   PetscFunctionReturn(0);
5280 }
5281 
5282 /*@C
5283      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5284 
5285     Not Collective
5286 
5287    Input Parameters:
5288 +    A - the matrix
5289 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5290 -    row, col - index sets of rows and columns to extract (or NULL)
5291 
5292    Output Parameter:
5293 .    A_loc - the local sequential matrix generated
5294 
5295     Level: developer
5296 
5297 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5298 
5299 @*/
5300 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5301 {
5302   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5303   PetscErrorCode ierr;
5304   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5305   IS             isrowa,iscola;
5306   Mat            *aloc;
5307   PetscBool      match;
5308 
5309   PetscFunctionBegin;
5310   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5311   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5312   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5313   if (!row) {
5314     start = A->rmap->rstart; end = A->rmap->rend;
5315     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5316   } else {
5317     isrowa = *row;
5318   }
5319   if (!col) {
5320     start = A->cmap->rstart;
5321     cmap  = a->garray;
5322     nzA   = a->A->cmap->n;
5323     nzB   = a->B->cmap->n;
5324     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5325     ncols = 0;
5326     for (i=0; i<nzB; i++) {
5327       if (cmap[i] < start) idx[ncols++] = cmap[i];
5328       else break;
5329     }
5330     imark = i;
5331     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5332     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5333     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5334   } else {
5335     iscola = *col;
5336   }
5337   if (scall != MAT_INITIAL_MATRIX) {
5338     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5339     aloc[0] = *A_loc;
5340   }
5341   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5342   if (!col) { /* attach global id of condensed columns */
5343     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5344   }
5345   *A_loc = aloc[0];
5346   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5347   if (!row) {
5348     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5349   }
5350   if (!col) {
5351     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5352   }
5353   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5354   PetscFunctionReturn(0);
5355 }
5356 
5357 /*
5358  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5359  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5360  * on a global size.
5361  * */
5362 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5363 {
5364   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5365   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5366   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5367   PetscMPIInt              owner;
5368   PetscSFNode              *iremote,*oiremote;
5369   const PetscInt           *lrowindices;
5370   PetscErrorCode           ierr;
5371   PetscSF                  sf,osf;
5372   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5373   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5374   MPI_Comm                 comm;
5375   ISLocalToGlobalMapping   mapping;
5376   const PetscScalar        *pd_a,*po_a;
5377 
5378   PetscFunctionBegin;
5379   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5380   /* plocalsize is the number of roots
5381    * nrows is the number of leaves
5382    * */
5383   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5384   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5385   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5386   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5387   for (i=0;i<nrows;i++) {
5388     /* Find a remote index and an owner for a row
5389      * The row could be local or remote
5390      * */
5391     owner = 0;
5392     lidx  = 0;
5393     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5394     iremote[i].index = lidx;
5395     iremote[i].rank  = owner;
5396   }
5397   /* Create SF to communicate how many nonzero columns for each row */
5398   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5399   /* SF will figure out the number of nonzero colunms for each row, and their
5400    * offsets
5401    * */
5402   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5403   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5404   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5405 
5406   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5407   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5408   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5409   roffsets[0] = 0;
5410   roffsets[1] = 0;
5411   for (i=0;i<plocalsize;i++) {
5412     /* diag */
5413     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5414     /* off diag */
5415     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5416     /* compute offsets so that we relative location for each row */
5417     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5418     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5419   }
5420   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5421   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5422   /* 'r' means root, and 'l' means leaf */
5423   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5424   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5425   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5426   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5427   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5428   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5429   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5430   dntotalcols = 0;
5431   ontotalcols = 0;
5432   ncol = 0;
5433   for (i=0;i<nrows;i++) {
5434     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5435     ncol = PetscMax(pnnz[i],ncol);
5436     /* diag */
5437     dntotalcols += nlcols[i*2+0];
5438     /* off diag */
5439     ontotalcols += nlcols[i*2+1];
5440   }
5441   /* We do not need to figure the right number of columns
5442    * since all the calculations will be done by going through the raw data
5443    * */
5444   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5445   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5446   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5447   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5448   /* diag */
5449   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5450   /* off diag */
5451   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5452   /* diag */
5453   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5454   /* off diag */
5455   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5456   dntotalcols = 0;
5457   ontotalcols = 0;
5458   ntotalcols  = 0;
5459   for (i=0;i<nrows;i++) {
5460     owner = 0;
5461     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5462     /* Set iremote for diag matrix */
5463     for (j=0;j<nlcols[i*2+0];j++) {
5464       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5465       iremote[dntotalcols].rank    = owner;
5466       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5467       ilocal[dntotalcols++]        = ntotalcols++;
5468     }
5469     /* off diag */
5470     for (j=0;j<nlcols[i*2+1];j++) {
5471       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5472       oiremote[ontotalcols].rank    = owner;
5473       oilocal[ontotalcols++]        = ntotalcols++;
5474     }
5475   }
5476   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5477   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5478   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5479   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5480   /* P serves as roots and P_oth is leaves
5481    * Diag matrix
5482    * */
5483   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5484   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5485   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5486 
5487   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5488   /* Off diag */
5489   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5490   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5491   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5492   ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5493   ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5494   /* We operate on the matrix internal data for saving memory */
5495   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5496   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5497   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5498   /* Convert to global indices for diag matrix */
5499   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5500   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5501   /* We want P_oth store global indices */
5502   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5503   /* Use memory scalable approach */
5504   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5505   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5506   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5507   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5508   /* Convert back to local indices */
5509   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5510   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5511   nout = 0;
5512   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5513   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5514   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5515   /* Exchange values */
5516   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5517   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5518   ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5519   ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5520   /* Stop PETSc from shrinking memory */
5521   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5522   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5523   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5524   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5525   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5526   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5527   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5528   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5529   PetscFunctionReturn(0);
5530 }
5531 
5532 /*
5533  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5534  * This supports MPIAIJ and MAIJ
5535  * */
5536 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5537 {
5538   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5539   Mat_SeqAIJ            *p_oth;
5540   IS                    rows,map;
5541   PetscHMapI            hamp;
5542   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5543   MPI_Comm              comm;
5544   PetscSF               sf,osf;
5545   PetscBool             has;
5546   PetscErrorCode        ierr;
5547 
5548   PetscFunctionBegin;
5549   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5550   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5551   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5552    *  and then create a submatrix (that often is an overlapping matrix)
5553    * */
5554   if (reuse == MAT_INITIAL_MATRIX) {
5555     /* Use a hash table to figure out unique keys */
5556     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5557     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5558     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5559     count = 0;
5560     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5561     for (i=0;i<a->B->cmap->n;i++) {
5562       key  = a->garray[i]/dof;
5563       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5564       if (!has) {
5565         mapping[i] = count;
5566         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5567       } else {
5568         /* Current 'i' has the same value the previous step */
5569         mapping[i] = count-1;
5570       }
5571     }
5572     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5573     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5574     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5575     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5576     off = 0;
5577     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5578     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5579     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5580     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5581     /* In case, the matrix was already created but users want to recreate the matrix */
5582     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5583     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5584     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5585     ierr = ISDestroy(&map);CHKERRQ(ierr);
5586     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5587   } else if (reuse == MAT_REUSE_MATRIX) {
5588     /* If matrix was already created, we simply update values using SF objects
5589      * that as attached to the matrix ealier.
5590      */
5591     const PetscScalar *pd_a,*po_a;
5592 
5593     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5594     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5595     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5596     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5597     /* Update values in place */
5598     ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5599     ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5600     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5601     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5602     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5603     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5604     ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5605     ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5606   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5607   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5608   PetscFunctionReturn(0);
5609 }
5610 
5611 /*@C
5612     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5613 
5614     Collective on Mat
5615 
5616    Input Parameters:
5617 +    A - the first matrix in mpiaij format
5618 .    B - the second matrix in mpiaij format
5619 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5620 
5621    Input/Output Parameters:
5622 +    rowb - index sets of rows of B to extract (or NULL), modified on output
5623 -    colb - index sets of columns of B to extract (or NULL), modified on output
5624 
5625    Output Parameter:
5626 .    B_seq - the sequential matrix generated
5627 
5628     Level: developer
5629 
5630 @*/
5631 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5632 {
5633   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5634   PetscErrorCode ierr;
5635   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5636   IS             isrowb,iscolb;
5637   Mat            *bseq=NULL;
5638 
5639   PetscFunctionBegin;
5640   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5641     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5642   }
5643   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5644 
5645   if (scall == MAT_INITIAL_MATRIX) {
5646     start = A->cmap->rstart;
5647     cmap  = a->garray;
5648     nzA   = a->A->cmap->n;
5649     nzB   = a->B->cmap->n;
5650     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5651     ncols = 0;
5652     for (i=0; i<nzB; i++) {  /* row < local row index */
5653       if (cmap[i] < start) idx[ncols++] = cmap[i];
5654       else break;
5655     }
5656     imark = i;
5657     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5658     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5659     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5660     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5661   } else {
5662     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5663     isrowb  = *rowb; iscolb = *colb;
5664     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5665     bseq[0] = *B_seq;
5666   }
5667   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5668   *B_seq = bseq[0];
5669   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5670   if (!rowb) {
5671     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5672   } else {
5673     *rowb = isrowb;
5674   }
5675   if (!colb) {
5676     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5677   } else {
5678     *colb = iscolb;
5679   }
5680   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5681   PetscFunctionReturn(0);
5682 }
5683 
5684 /*
5685     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5686     of the OFF-DIAGONAL portion of local A
5687 
5688     Collective on Mat
5689 
5690    Input Parameters:
5691 +    A,B - the matrices in mpiaij format
5692 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5693 
5694    Output Parameter:
5695 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5696 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5697 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5698 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5699 
5700     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5701      for this matrix. This is not desirable..
5702 
5703     Level: developer
5704 
5705 */
5706 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5707 {
5708   PetscErrorCode         ierr;
5709   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5710   Mat_SeqAIJ             *b_oth;
5711   VecScatter             ctx;
5712   MPI_Comm               comm;
5713   const PetscMPIInt      *rprocs,*sprocs;
5714   const PetscInt         *srow,*rstarts,*sstarts;
5715   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5716   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5717   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5718   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5719   PetscMPIInt            size,tag,rank,nreqs;
5720 
5721   PetscFunctionBegin;
5722   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5723   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5724 
5725   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5726     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5727   }
5728   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5729   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5730 
5731   if (size == 1) {
5732     startsj_s = NULL;
5733     bufa_ptr  = NULL;
5734     *B_oth    = NULL;
5735     PetscFunctionReturn(0);
5736   }
5737 
5738   ctx = a->Mvctx;
5739   tag = ((PetscObject)ctx)->tag;
5740 
5741   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5742   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5743   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5744   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5745   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5746   rwaits = reqs;
5747   swaits = reqs + nrecvs;
5748 
5749   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5750   if (scall == MAT_INITIAL_MATRIX) {
5751     /* i-array */
5752     /*---------*/
5753     /*  post receives */
5754     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5755     for (i=0; i<nrecvs; i++) {
5756       rowlen = rvalues + rstarts[i]*rbs;
5757       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5758       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5759     }
5760 
5761     /* pack the outgoing message */
5762     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5763 
5764     sstartsj[0] = 0;
5765     rstartsj[0] = 0;
5766     len         = 0; /* total length of j or a array to be sent */
5767     if (nsends) {
5768       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5769       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5770     }
5771     for (i=0; i<nsends; i++) {
5772       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5773       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5774       for (j=0; j<nrows; j++) {
5775         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5776         for (l=0; l<sbs; l++) {
5777           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5778 
5779           rowlen[j*sbs+l] = ncols;
5780 
5781           len += ncols;
5782           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5783         }
5784         k++;
5785       }
5786       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5787 
5788       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5789     }
5790     /* recvs and sends of i-array are completed */
5791     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5792     ierr = PetscFree(svalues);CHKERRQ(ierr);
5793 
5794     /* allocate buffers for sending j and a arrays */
5795     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5796     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5797 
5798     /* create i-array of B_oth */
5799     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5800 
5801     b_othi[0] = 0;
5802     len       = 0; /* total length of j or a array to be received */
5803     k         = 0;
5804     for (i=0; i<nrecvs; i++) {
5805       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5806       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5807       for (j=0; j<nrows; j++) {
5808         b_othi[k+1] = b_othi[k] + rowlen[j];
5809         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5810         k++;
5811       }
5812       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5813     }
5814     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5815 
5816     /* allocate space for j and a arrrays of B_oth */
5817     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5818     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5819 
5820     /* j-array */
5821     /*---------*/
5822     /*  post receives of j-array */
5823     for (i=0; i<nrecvs; i++) {
5824       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5825       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5826     }
5827 
5828     /* pack the outgoing message j-array */
5829     if (nsends) k = sstarts[0];
5830     for (i=0; i<nsends; i++) {
5831       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5832       bufJ  = bufj+sstartsj[i];
5833       for (j=0; j<nrows; j++) {
5834         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5835         for (ll=0; ll<sbs; ll++) {
5836           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5837           for (l=0; l<ncols; l++) {
5838             *bufJ++ = cols[l];
5839           }
5840           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5841         }
5842       }
5843       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5844     }
5845 
5846     /* recvs and sends of j-array are completed */
5847     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5848   } else if (scall == MAT_REUSE_MATRIX) {
5849     sstartsj = *startsj_s;
5850     rstartsj = *startsj_r;
5851     bufa     = *bufa_ptr;
5852     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5853     ierr     = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5854   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5855 
5856   /* a-array */
5857   /*---------*/
5858   /*  post receives of a-array */
5859   for (i=0; i<nrecvs; i++) {
5860     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5861     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5862   }
5863 
5864   /* pack the outgoing message a-array */
5865   if (nsends) k = sstarts[0];
5866   for (i=0; i<nsends; i++) {
5867     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5868     bufA  = bufa+sstartsj[i];
5869     for (j=0; j<nrows; j++) {
5870       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5871       for (ll=0; ll<sbs; ll++) {
5872         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5873         for (l=0; l<ncols; l++) {
5874           *bufA++ = vals[l];
5875         }
5876         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5877       }
5878     }
5879     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5880   }
5881   /* recvs and sends of a-array are completed */
5882   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5883   ierr = PetscFree(reqs);CHKERRQ(ierr);
5884 
5885   if (scall == MAT_INITIAL_MATRIX) {
5886     /* put together the new matrix */
5887     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5888 
5889     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5890     /* Since these are PETSc arrays, change flags to free them as necessary. */
5891     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5892     b_oth->free_a  = PETSC_TRUE;
5893     b_oth->free_ij = PETSC_TRUE;
5894     b_oth->nonew   = 0;
5895 
5896     ierr = PetscFree(bufj);CHKERRQ(ierr);
5897     if (!startsj_s || !bufa_ptr) {
5898       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5899       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5900     } else {
5901       *startsj_s = sstartsj;
5902       *startsj_r = rstartsj;
5903       *bufa_ptr  = bufa;
5904     }
5905   } else if (scall == MAT_REUSE_MATRIX) {
5906     ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5907   }
5908 
5909   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5910   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5911   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5912   PetscFunctionReturn(0);
5913 }
5914 
5915 /*@C
5916   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5917 
5918   Not Collective
5919 
5920   Input Parameter:
5921 . A - The matrix in mpiaij format
5922 
5923   Output Parameters:
5924 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5925 . colmap - A map from global column index to local index into lvec
5926 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5927 
5928   Level: developer
5929 
5930 @*/
5931 #if defined(PETSC_USE_CTABLE)
5932 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5933 #else
5934 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5935 #endif
5936 {
5937   Mat_MPIAIJ *a;
5938 
5939   PetscFunctionBegin;
5940   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5941   PetscValidPointer(lvec, 2);
5942   PetscValidPointer(colmap, 3);
5943   PetscValidPointer(multScatter, 4);
5944   a = (Mat_MPIAIJ*) A->data;
5945   if (lvec) *lvec = a->lvec;
5946   if (colmap) *colmap = a->colmap;
5947   if (multScatter) *multScatter = a->Mvctx;
5948   PetscFunctionReturn(0);
5949 }
5950 
5951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5954 #if defined(PETSC_HAVE_MKL_SPARSE)
5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5956 #endif
5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5959 #if defined(PETSC_HAVE_ELEMENTAL)
5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5961 #endif
5962 #if defined(PETSC_HAVE_SCALAPACK)
5963 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5964 #endif
5965 #if defined(PETSC_HAVE_HYPRE)
5966 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5967 #endif
5968 #if defined(PETSC_HAVE_CUDA)
5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5970 #endif
5971 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5972 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5973 #endif
5974 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5975 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5976 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5977 
5978 /*
5979     Computes (B'*A')' since computing B*A directly is untenable
5980 
5981                n                       p                          p
5982         [             ]       [             ]         [                 ]
5983       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5984         [             ]       [             ]         [                 ]
5985 
5986 */
5987 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5988 {
5989   PetscErrorCode ierr;
5990   Mat            At,Bt,Ct;
5991 
5992   PetscFunctionBegin;
5993   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5994   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5995   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5996   ierr = MatDestroy(&At);CHKERRQ(ierr);
5997   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5998   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5999   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
6000   PetscFunctionReturn(0);
6001 }
6002 
6003 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
6004 {
6005   PetscErrorCode ierr;
6006   PetscBool      cisdense;
6007 
6008   PetscFunctionBegin;
6009   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
6010   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
6011   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6012   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6013   if (!cisdense) {
6014     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6015   }
6016   ierr = MatSetUp(C);CHKERRQ(ierr);
6017 
6018   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6019   PetscFunctionReturn(0);
6020 }
6021 
6022 /* ----------------------------------------------------------------*/
6023 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6024 {
6025   Mat_Product *product = C->product;
6026   Mat         A = product->A,B=product->B;
6027 
6028   PetscFunctionBegin;
6029   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6030     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6031 
6032   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6033   C->ops->productsymbolic = MatProductSymbolic_AB;
6034   PetscFunctionReturn(0);
6035 }
6036 
6037 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6038 {
6039   PetscErrorCode ierr;
6040   Mat_Product    *product = C->product;
6041 
6042   PetscFunctionBegin;
6043   if (product->type == MATPRODUCT_AB) {
6044     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6045   }
6046   PetscFunctionReturn(0);
6047 }
6048 /* ----------------------------------------------------------------*/
6049 
6050 /*MC
6051    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6052 
6053    Options Database Keys:
6054 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6055 
6056    Level: beginner
6057 
6058    Notes:
6059     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6060     in this case the values associated with the rows and columns one passes in are set to zero
6061     in the matrix
6062 
6063     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6064     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6065 
6066 .seealso: MatCreateAIJ()
6067 M*/
6068 
6069 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6070 {
6071   Mat_MPIAIJ     *b;
6072   PetscErrorCode ierr;
6073   PetscMPIInt    size;
6074 
6075   PetscFunctionBegin;
6076   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6077 
6078   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6079   B->data       = (void*)b;
6080   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6081   B->assembled  = PETSC_FALSE;
6082   B->insertmode = NOT_SET_VALUES;
6083   b->size       = size;
6084 
6085   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6086 
6087   /* build cache for off array entries formed */
6088   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6089 
6090   b->donotstash  = PETSC_FALSE;
6091   b->colmap      = NULL;
6092   b->garray      = NULL;
6093   b->roworiented = PETSC_TRUE;
6094 
6095   /* stuff used for matrix vector multiply */
6096   b->lvec  = NULL;
6097   b->Mvctx = NULL;
6098 
6099   /* stuff for MatGetRow() */
6100   b->rowindices   = NULL;
6101   b->rowvalues    = NULL;
6102   b->getrowactive = PETSC_FALSE;
6103 
6104   /* flexible pointer used in CUSPARSE classes */
6105   b->spptr = NULL;
6106 
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6117 #if defined(PETSC_HAVE_CUDA)
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6119 #endif
6120 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6122 #endif
6123 #if defined(PETSC_HAVE_MKL_SPARSE)
6124   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6125 #endif
6126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6129   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6130 #if defined(PETSC_HAVE_ELEMENTAL)
6131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6132 #endif
6133 #if defined(PETSC_HAVE_SCALAPACK)
6134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6135 #endif
6136   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6137   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6138 #if defined(PETSC_HAVE_HYPRE)
6139   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6140   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6141 #endif
6142   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6143   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6144   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6145   PetscFunctionReturn(0);
6146 }
6147 
6148 /*@C
6149      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6150          and "off-diagonal" part of the matrix in CSR format.
6151 
6152    Collective
6153 
6154    Input Parameters:
6155 +  comm - MPI communicator
6156 .  m - number of local rows (Cannot be PETSC_DECIDE)
6157 .  n - This value should be the same as the local size used in creating the
6158        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6159        calculated if N is given) For square matrices n is almost always m.
6160 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6161 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6162 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6163 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6164 .   a - matrix values
6165 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6166 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6167 -   oa - matrix values
6168 
6169    Output Parameter:
6170 .   mat - the matrix
6171 
6172    Level: advanced
6173 
6174    Notes:
6175        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6176        must free the arrays once the matrix has been destroyed and not before.
6177 
6178        The i and j indices are 0 based
6179 
6180        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6181 
6182        This sets local rows and cannot be used to set off-processor values.
6183 
6184        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6185        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6186        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6187        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6188        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6189        communication if it is known that only local entries will be set.
6190 
6191 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6192           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6193 @*/
6194 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6195 {
6196   PetscErrorCode ierr;
6197   Mat_MPIAIJ     *maij;
6198 
6199   PetscFunctionBegin;
6200   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6201   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6202   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6203   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6204   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6205   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6206   maij = (Mat_MPIAIJ*) (*mat)->data;
6207 
6208   (*mat)->preallocated = PETSC_TRUE;
6209 
6210   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6211   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6212 
6213   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6214   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6215 
6216   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6217   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6218   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6219   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6220   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6221   PetscFunctionReturn(0);
6222 }
6223 
6224 /*
6225     Special version for direct calls from Fortran
6226 */
6227 #include <petsc/private/fortranimpl.h>
6228 
6229 /* Change these macros so can be used in void function */
6230 #undef CHKERRQ
6231 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6232 #undef SETERRQ2
6233 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6234 #undef SETERRQ3
6235 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6236 #undef SETERRQ
6237 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6238 
6239 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6240 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6241 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6242 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6243 #else
6244 #endif
6245 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6246 {
6247   Mat            mat  = *mmat;
6248   PetscInt       m    = *mm, n = *mn;
6249   InsertMode     addv = *maddv;
6250   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6251   PetscScalar    value;
6252   PetscErrorCode ierr;
6253 
6254   MatCheckPreallocated(mat,1);
6255   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6256   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6257   {
6258     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6259     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6260     PetscBool roworiented = aij->roworiented;
6261 
6262     /* Some Variables required in the macro */
6263     Mat        A                    = aij->A;
6264     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6265     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6266     MatScalar  *aa;
6267     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6268     Mat        B                    = aij->B;
6269     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6270     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6271     MatScalar  *ba;
6272     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6273      * cannot use "#if defined" inside a macro. */
6274     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6275 
6276     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6277     PetscInt  nonew = a->nonew;
6278     MatScalar *ap1,*ap2;
6279 
6280     PetscFunctionBegin;
6281     ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
6282     ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
6283     for (i=0; i<m; i++) {
6284       if (im[i] < 0) continue;
6285       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6286       if (im[i] >= rstart && im[i] < rend) {
6287         row      = im[i] - rstart;
6288         lastcol1 = -1;
6289         rp1      = aj + ai[row];
6290         ap1      = aa + ai[row];
6291         rmax1    = aimax[row];
6292         nrow1    = ailen[row];
6293         low1     = 0;
6294         high1    = nrow1;
6295         lastcol2 = -1;
6296         rp2      = bj + bi[row];
6297         ap2      = ba + bi[row];
6298         rmax2    = bimax[row];
6299         nrow2    = bilen[row];
6300         low2     = 0;
6301         high2    = nrow2;
6302 
6303         for (j=0; j<n; j++) {
6304           if (roworiented) value = v[i*n+j];
6305           else value = v[i+j*m];
6306           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6307           if (in[j] >= cstart && in[j] < cend) {
6308             col = in[j] - cstart;
6309             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6310           } else if (in[j] < 0) continue;
6311           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6312             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6313             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6314           } else {
6315             if (mat->was_assembled) {
6316               if (!aij->colmap) {
6317                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6318               }
6319 #if defined(PETSC_USE_CTABLE)
6320               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6321               col--;
6322 #else
6323               col = aij->colmap[in[j]] - 1;
6324 #endif
6325               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6326                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6327                 col  =  in[j];
6328                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6329                 B        = aij->B;
6330                 b        = (Mat_SeqAIJ*)B->data;
6331                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6332                 rp2      = bj + bi[row];
6333                 ap2      = ba + bi[row];
6334                 rmax2    = bimax[row];
6335                 nrow2    = bilen[row];
6336                 low2     = 0;
6337                 high2    = nrow2;
6338                 bm       = aij->B->rmap->n;
6339                 ba       = b->a;
6340                 inserted = PETSC_FALSE;
6341               }
6342             } else col = in[j];
6343             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6344           }
6345         }
6346       } else if (!aij->donotstash) {
6347         if (roworiented) {
6348           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6349         } else {
6350           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6351         }
6352       }
6353     }
6354     ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
6355     ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
6356   }
6357   PetscFunctionReturnVoid();
6358 }
6359 
6360 typedef struct {
6361   Mat       *mp;    /* intermediate products */
6362   PetscBool *mptmp; /* is the intermediate product temporary ? */
6363   PetscInt  cp;     /* number of intermediate products */
6364 
6365   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6366   PetscInt    *startsj_s,*startsj_r;
6367   PetscScalar *bufa;
6368   Mat         P_oth;
6369 
6370   /* may take advantage of merging product->B */
6371   Mat Bloc; /* B-local by merging diag and off-diag */
6372 
6373   /* cusparse does not have support to split between symbolic and numeric phases.
6374      When api_user is true, we don't need to update the numerical values
6375      of the temporary storage */
6376   PetscBool reusesym;
6377 
6378   /* support for COO values insertion */
6379   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6380   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6381   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6382   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6383   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6384   PetscMemType mtype;
6385 
6386   /* customization */
6387   PetscBool abmerge;
6388   PetscBool P_oth_bind;
6389 } MatMatMPIAIJBACKEND;
6390 
6391 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6392 {
6393   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6394   PetscInt            i;
6395   PetscErrorCode      ierr;
6396 
6397   PetscFunctionBegin;
6398   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6399   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6400   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6401   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6402   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6403   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6404   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6405   for (i = 0; i < mmdata->cp; i++) {
6406     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6407   }
6408   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6409   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6410   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6411   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6412   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6413   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6414   PetscFunctionReturn(0);
6415 }
6416 
6417 /* Copy selected n entries with indices in idx[] of A to v[].
6418    If idx is NULL, copy the whole data array of A to v[]
6419  */
6420 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6421 {
6422   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6423   PetscErrorCode ierr;
6424 
6425   PetscFunctionBegin;
6426   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6427   if (f) {
6428     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6429   } else {
6430     const PetscScalar *vv;
6431 
6432     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6433     if (n && idx) {
6434       PetscScalar    *w = v;
6435       const PetscInt *oi = idx;
6436       PetscInt       j;
6437 
6438       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6439     } else {
6440       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6441     }
6442     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6443   }
6444   PetscFunctionReturn(0);
6445 }
6446 
6447 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6448 {
6449   MatMatMPIAIJBACKEND *mmdata;
6450   PetscInt            i,n_d,n_o;
6451   PetscErrorCode      ierr;
6452 
6453   PetscFunctionBegin;
6454   MatCheckProduct(C,1);
6455   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6456   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6457   if (!mmdata->reusesym) { /* update temporary matrices */
6458     if (mmdata->P_oth) {
6459       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6460     }
6461     if (mmdata->Bloc) {
6462       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6463     }
6464   }
6465   mmdata->reusesym = PETSC_FALSE;
6466 
6467   for (i = 0; i < mmdata->cp; i++) {
6468     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6469     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6470   }
6471   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6472     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6473 
6474     if (mmdata->mptmp[i]) continue;
6475     if (noff) {
6476       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6477 
6478       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6479       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6480       n_o += noff;
6481       n_d += nown;
6482     } else {
6483       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6484 
6485       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6486       n_d += mm->nz;
6487     }
6488   }
6489   if (mmdata->hasoffproc) { /* offprocess insertion */
6490     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6491     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6492   }
6493   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6494   PetscFunctionReturn(0);
6495 }
6496 
6497 /* Support for Pt * A, A * P, or Pt * A * P */
6498 #define MAX_NUMBER_INTERMEDIATE 4
6499 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6500 {
6501   Mat_Product            *product = C->product;
6502   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6503   Mat_MPIAIJ             *a,*p;
6504   MatMatMPIAIJBACKEND    *mmdata;
6505   ISLocalToGlobalMapping P_oth_l2g = NULL;
6506   IS                     glob = NULL;
6507   const char             *prefix;
6508   char                   pprefix[256];
6509   const PetscInt         *globidx,*P_oth_idx;
6510   PetscInt               i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j;
6511   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6512                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6513                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6514   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6515 
6516   MatProductType         ptype;
6517   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6518   PetscMPIInt            size;
6519   PetscErrorCode         ierr;
6520 
6521   PetscFunctionBegin;
6522   MatCheckProduct(C,1);
6523   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6524   ptype = product->type;
6525   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6526     ptype = MATPRODUCT_AB;
6527     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6528   }
6529   switch (ptype) {
6530   case MATPRODUCT_AB:
6531     A = product->A;
6532     P = product->B;
6533     m = A->rmap->n;
6534     n = P->cmap->n;
6535     M = A->rmap->N;
6536     N = P->cmap->N;
6537     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6538     break;
6539   case MATPRODUCT_AtB:
6540     P = product->A;
6541     A = product->B;
6542     m = P->cmap->n;
6543     n = A->cmap->n;
6544     M = P->cmap->N;
6545     N = A->cmap->N;
6546     hasoffproc = PETSC_TRUE;
6547     break;
6548   case MATPRODUCT_PtAP:
6549     A = product->A;
6550     P = product->B;
6551     m = P->cmap->n;
6552     n = P->cmap->n;
6553     M = P->cmap->N;
6554     N = P->cmap->N;
6555     hasoffproc = PETSC_TRUE;
6556     break;
6557   default:
6558     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6559   }
6560   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6561   if (size == 1) hasoffproc = PETSC_FALSE;
6562 
6563   /* defaults */
6564   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6565     mp[i]    = NULL;
6566     mptmp[i] = PETSC_FALSE;
6567     rmapt[i] = -1;
6568     cmapt[i] = -1;
6569     rmapa[i] = NULL;
6570     cmapa[i] = NULL;
6571   }
6572 
6573   /* customization */
6574   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6575   mmdata->reusesym = product->api_user;
6576   if (ptype == MATPRODUCT_AB) {
6577     if (product->api_user) {
6578       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6579       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6580       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6581       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6582     } else {
6583       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6584       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6585       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6586       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6587     }
6588   } else if (ptype == MATPRODUCT_PtAP) {
6589     if (product->api_user) {
6590       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6591       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6592       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6593     } else {
6594       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6595       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6596       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6597     }
6598   }
6599   a = (Mat_MPIAIJ*)A->data;
6600   p = (Mat_MPIAIJ*)P->data;
6601   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6602   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6603   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6604   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6605   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6606 
6607   cp   = 0;
6608   switch (ptype) {
6609   case MATPRODUCT_AB: /* A * P */
6610     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6611 
6612     /* A_diag * P_local (merged or not) */
6613     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6614       /* P is product->B */
6615       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6616       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6617       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6618       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6619       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6620       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6621       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6622       mp[cp]->product->api_user = product->api_user;
6623       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6624       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6625       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6626       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6627       rmapt[cp] = 1;
6628       cmapt[cp] = 2;
6629       cmapa[cp] = globidx;
6630       mptmp[cp] = PETSC_FALSE;
6631       cp++;
6632     } else { /* A_diag * P_diag and A_diag * P_off */
6633       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6634       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6635       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6636       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6637       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6638       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6639       mp[cp]->product->api_user = product->api_user;
6640       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6641       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6642       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6643       rmapt[cp] = 1;
6644       cmapt[cp] = 1;
6645       mptmp[cp] = PETSC_FALSE;
6646       cp++;
6647       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6648       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6649       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6650       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6651       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6652       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6653       mp[cp]->product->api_user = product->api_user;
6654       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6655       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6656       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6657       rmapt[cp] = 1;
6658       cmapt[cp] = 2;
6659       cmapa[cp] = p->garray;
6660       mptmp[cp] = PETSC_FALSE;
6661       cp++;
6662     }
6663 
6664     /* A_off * P_other */
6665     if (mmdata->P_oth) {
6666       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
6667       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6668       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6669       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6670       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6671       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6672       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6673       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6674       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6675       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6676       mp[cp]->product->api_user = product->api_user;
6677       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6678       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6679       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6680       rmapt[cp] = 1;
6681       cmapt[cp] = 2;
6682       cmapa[cp] = P_oth_idx;
6683       mptmp[cp] = PETSC_FALSE;
6684       cp++;
6685     }
6686     break;
6687 
6688   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6689     /* A is product->B */
6690     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6691     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6692       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6693       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6694       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6695       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6696       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6697       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6698       mp[cp]->product->api_user = product->api_user;
6699       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6700       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6701       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6702       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6703       rmapt[cp] = 2;
6704       rmapa[cp] = globidx;
6705       cmapt[cp] = 2;
6706       cmapa[cp] = globidx;
6707       mptmp[cp] = PETSC_FALSE;
6708       cp++;
6709     } else {
6710       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6711       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6712       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6713       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6714       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6715       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6716       mp[cp]->product->api_user = product->api_user;
6717       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6718       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6719       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6720       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6721       rmapt[cp] = 1;
6722       cmapt[cp] = 2;
6723       cmapa[cp] = globidx;
6724       mptmp[cp] = PETSC_FALSE;
6725       cp++;
6726       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6727       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6728       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6729       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6730       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6731       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6732       mp[cp]->product->api_user = product->api_user;
6733       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6734       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6735       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6736       rmapt[cp] = 2;
6737       rmapa[cp] = p->garray;
6738       cmapt[cp] = 2;
6739       cmapa[cp] = globidx;
6740       mptmp[cp] = PETSC_FALSE;
6741       cp++;
6742     }
6743     break;
6744   case MATPRODUCT_PtAP:
6745     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6746     /* P is product->B */
6747     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6748     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6749     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6750     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6751     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6752     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6753     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6754     mp[cp]->product->api_user = product->api_user;
6755     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6756     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6757     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6758     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6759     rmapt[cp] = 2;
6760     rmapa[cp] = globidx;
6761     cmapt[cp] = 2;
6762     cmapa[cp] = globidx;
6763     mptmp[cp] = PETSC_FALSE;
6764     cp++;
6765     if (mmdata->P_oth) {
6766       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6767       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6768       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6769       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6770       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6771       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6772       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6773       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6774       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6775       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6776       mp[cp]->product->api_user = product->api_user;
6777       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6778       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6779       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6780       mptmp[cp] = PETSC_TRUE;
6781       cp++;
6782       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6783       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6784       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6785       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6786       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6787       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6788       mp[cp]->product->api_user = product->api_user;
6789       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6790       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6791       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6792       rmapt[cp] = 2;
6793       rmapa[cp] = globidx;
6794       cmapt[cp] = 2;
6795       cmapa[cp] = P_oth_idx;
6796       mptmp[cp] = PETSC_FALSE;
6797       cp++;
6798     }
6799     break;
6800   default:
6801     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6802   }
6803   /* sanity check */
6804   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6805 
6806   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
6807   for (i = 0; i < cp; i++) {
6808     mmdata->mp[i]    = mp[i];
6809     mmdata->mptmp[i] = mptmp[i];
6810   }
6811   mmdata->cp = cp;
6812   C->product->data       = mmdata;
6813   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6814   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6815 
6816   /* memory type */
6817   mmdata->mtype = PETSC_MEMTYPE_HOST;
6818   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6819   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6820   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6821 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
6822   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6823 #endif
6824 
6825   /* prepare coo coordinates for values insertion */
6826 
6827   /* count total nonzeros of those intermediate seqaij Mats
6828     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
6829     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
6830     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
6831   */
6832   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6833     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6834     if (mptmp[cp]) continue;
6835     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
6836       const PetscInt *rmap = rmapa[cp];
6837       const PetscInt mr = mp[cp]->rmap->n;
6838       const PetscInt rs = C->rmap->rstart;
6839       const PetscInt re = C->rmap->rend;
6840       const PetscInt *ii  = mm->i;
6841       for (i = 0; i < mr; i++) {
6842         const PetscInt gr = rmap[i];
6843         const PetscInt nz = ii[i+1] - ii[i];
6844         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
6845         else ncoo_oown += nz; /* this row is local */
6846       }
6847     } else ncoo_d += mm->nz;
6848   }
6849 
6850   /*
6851     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
6852 
6853     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
6854 
6855     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
6856 
6857     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
6858     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
6859     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
6860 
6861     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
6862     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
6863   */
6864   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
6865   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6866 
6867   /* gather (i,j) of nonzeros inserted by remote procs */
6868   if (hasoffproc) {
6869     PetscSF  msf;
6870     PetscInt ncoo2,*coo_i2,*coo_j2;
6871 
6872     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6873     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6874     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
6875 
6876     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6877       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6878       PetscInt   *idxoff = mmdata->off[cp];
6879       PetscInt   *idxown = mmdata->own[cp];
6880       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
6881         const PetscInt *rmap = rmapa[cp];
6882         const PetscInt *cmap = cmapa[cp];
6883         const PetscInt *ii  = mm->i;
6884         PetscInt       *coi = coo_i + ncoo_o;
6885         PetscInt       *coj = coo_j + ncoo_o;
6886         const PetscInt mr = mp[cp]->rmap->n;
6887         const PetscInt rs = C->rmap->rstart;
6888         const PetscInt re = C->rmap->rend;
6889         const PetscInt cs = C->cmap->rstart;
6890         for (i = 0; i < mr; i++) {
6891           const PetscInt *jj = mm->j + ii[i];
6892           const PetscInt gr  = rmap[i];
6893           const PetscInt nz  = ii[i+1] - ii[i];
6894           if (gr < rs || gr >= re) { /* this is an offproc row */
6895             for (j = ii[i]; j < ii[i+1]; j++) {
6896               *coi++ = gr;
6897               *idxoff++ = j;
6898             }
6899             if (!cmapt[cp]) { /* already global */
6900               for (j = 0; j < nz; j++) *coj++ = jj[j];
6901             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6902               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6903             } else { /* offdiag */
6904               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6905             }
6906             ncoo_o += nz;
6907           } else { /* this is a local row */
6908             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6909           }
6910         }
6911       }
6912       mmdata->off[cp + 1] = idxoff;
6913       mmdata->own[cp + 1] = idxown;
6914     }
6915 
6916     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6917     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6918     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6919     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
6920     ncoo = ncoo_d + ncoo_oown + ncoo2;
6921     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6922     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
6923     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6924     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6925     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6926     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6927     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
6928     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6929     coo_i = coo_i2;
6930     coo_j = coo_j2;
6931   } else { /* no offproc values insertion */
6932     ncoo = ncoo_d;
6933     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6934 
6935     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6936     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6937     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6938   }
6939   mmdata->hasoffproc = hasoffproc;
6940 
6941    /* gather (i,j) of nonzeros inserted locally */
6942   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6943     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6944     PetscInt       *coi = coo_i + ncoo_d;
6945     PetscInt       *coj = coo_j + ncoo_d;
6946     const PetscInt *jj  = mm->j;
6947     const PetscInt *ii  = mm->i;
6948     const PetscInt *cmap = cmapa[cp];
6949     const PetscInt *rmap = rmapa[cp];
6950     const PetscInt mr = mp[cp]->rmap->n;
6951     const PetscInt rs = C->rmap->rstart;
6952     const PetscInt re = C->rmap->rend;
6953     const PetscInt cs = C->cmap->rstart;
6954 
6955     if (mptmp[cp]) continue;
6956     if (rmapt[cp] == 1) { /* consecutive rows */
6957       /* fill coo_i */
6958       for (i = 0; i < mr; i++) {
6959         const PetscInt gr = i + rs;
6960         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6961       }
6962       /* fill coo_j */
6963       if (!cmapt[cp]) { /* type-0, already global */
6964         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6965       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
6966         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
6967       } else { /* type-2, local to global for sparse columns */
6968         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6969       }
6970       ncoo_d += mm->nz;
6971     } else if (rmapt[cp] == 2) { /* sparse rows */
6972       for (i = 0; i < mr; i++) {
6973         const PetscInt *jj = mm->j + ii[i];
6974         const PetscInt gr  = rmap[i];
6975         const PetscInt nz  = ii[i+1] - ii[i];
6976         if (gr >= rs && gr < re) { /* local rows */
6977           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6978           if (!cmapt[cp]) { /* type-0, already global */
6979             for (j = 0; j < nz; j++) *coj++ = jj[j];
6980           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6981             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6982           } else { /* type-2, local to global for sparse columns */
6983             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6984           }
6985           ncoo_d += nz;
6986         }
6987       }
6988     }
6989   }
6990   if (glob) {
6991     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6992   }
6993   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6994   if (P_oth_l2g) {
6995     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6996   }
6997   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6998   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
6999   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
7000 
7001   /* preallocate with COO data */
7002   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
7003   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7004   PetscFunctionReturn(0);
7005 }
7006 
7007 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7008 {
7009   Mat_Product    *product = mat->product;
7010   PetscErrorCode ierr;
7011 #if defined(PETSC_HAVE_DEVICE)
7012   PetscBool      match = PETSC_FALSE;
7013   PetscBool      usecpu = PETSC_FALSE;
7014 #else
7015   PetscBool      match = PETSC_TRUE;
7016 #endif
7017 
7018   PetscFunctionBegin;
7019   MatCheckProduct(mat,1);
7020 #if defined(PETSC_HAVE_DEVICE)
7021   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7022     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7023   }
7024   if (match) { /* we can always fallback to the CPU if requested */
7025     switch (product->type) {
7026     case MATPRODUCT_AB:
7027       if (product->api_user) {
7028         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7029         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7030         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7031       } else {
7032         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7033         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7034         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7035       }
7036       break;
7037     case MATPRODUCT_AtB:
7038       if (product->api_user) {
7039         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7040         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7041         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7042       } else {
7043         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7044         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7045         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7046       }
7047       break;
7048     case MATPRODUCT_PtAP:
7049       if (product->api_user) {
7050         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7051         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7052         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7053       } else {
7054         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7055         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7056         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7057       }
7058       break;
7059     default:
7060       break;
7061     }
7062     match = (PetscBool)!usecpu;
7063   }
7064 #endif
7065   if (match) {
7066     switch (product->type) {
7067     case MATPRODUCT_AB:
7068     case MATPRODUCT_AtB:
7069     case MATPRODUCT_PtAP:
7070       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7071       break;
7072     default:
7073       break;
7074     }
7075   }
7076   /* fallback to MPIAIJ ops */
7077   if (!mat->ops->productsymbolic) {
7078     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7079   }
7080   PetscFunctionReturn(0);
7081 }
7082