xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f155c23239e6d3f7c7ec79ff00b4f28519d0ce99)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62 
63   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
64    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
65    * to differ from the parent matrix. */
66   if (a->lvec) {
67     ierr = VecBindToCPU(a->lvec,flg);CHKERRQ(ierr);
68   }
69   if (a->diag) {
70     ierr = VecBindToCPU(a->diag,flg);CHKERRQ(ierr);
71   }
72 
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
77 {
78   PetscErrorCode ierr;
79   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
80 
81   PetscFunctionBegin;
82   if (mat->A) {
83     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
84     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
90 {
91   PetscErrorCode  ierr;
92   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
93   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
94   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
95   const PetscInt  *ia,*ib;
96   const MatScalar *aa,*bb,*aav,*bav;
97   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
98   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
99 
100   PetscFunctionBegin;
101   *keptrows = NULL;
102 
103   ia   = a->i;
104   ib   = b->i;
105   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
106   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
107   for (i=0; i<m; i++) {
108     na = ia[i+1] - ia[i];
109     nb = ib[i+1] - ib[i];
110     if (!na && !nb) {
111       cnt++;
112       goto ok1;
113     }
114     aa = aav + ia[i];
115     for (j=0; j<na; j++) {
116       if (aa[j] != 0.0) goto ok1;
117     }
118     bb = bav + ib[i];
119     for (j=0; j <nb; j++) {
120       if (bb[j] != 0.0) goto ok1;
121     }
122     cnt++;
123 ok1:;
124   }
125   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
126   if (!n0rows) {
127     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
128     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
129     PetscFunctionReturn(0);
130   }
131   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
132   cnt  = 0;
133   for (i=0; i<m; i++) {
134     na = ia[i+1] - ia[i];
135     nb = ib[i+1] - ib[i];
136     if (!na && !nb) continue;
137     aa = aav + ia[i];
138     for (j=0; j<na;j++) {
139       if (aa[j] != 0.0) {
140         rows[cnt++] = rstart + i;
141         goto ok2;
142       }
143     }
144     bb = bav + ib[i];
145     for (j=0; j<nb; j++) {
146       if (bb[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151 ok2:;
152   }
153   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
154   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
155   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
156   PetscFunctionReturn(0);
157 }
158 
159 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
160 {
161   PetscErrorCode    ierr;
162   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
163   PetscBool         cong;
164 
165   PetscFunctionBegin;
166   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
167   if (Y->assembled && cong) {
168     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
169   } else {
170     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
171   }
172   PetscFunctionReturn(0);
173 }
174 
175 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
176 {
177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
178   PetscErrorCode ierr;
179   PetscInt       i,rstart,nrows,*rows;
180 
181   PetscFunctionBegin;
182   *zrows = NULL;
183   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
184   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
185   for (i=0; i<nrows; i++) rows[i] += rstart;
186   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
191 {
192   PetscErrorCode    ierr;
193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
194   PetscInt          i,m,n,*garray = aij->garray;
195   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
196   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
197   PetscReal         *work;
198   const PetscScalar *dummy;
199 
200   PetscFunctionBegin;
201   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
202   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
203   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
204   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
205   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
206   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
207   if (type == NORM_2) {
208     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
209       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
210     }
211     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
212       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
213     }
214   } else if (type == NORM_1) {
215     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
216       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
217     }
218     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
219       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
220     }
221   } else if (type == NORM_INFINITY) {
222     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
223       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     }
225     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
226       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
227     }
228   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
229     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
230       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
231     }
232     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
233       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
234     }
235   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
236     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
237       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
238     }
239     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
240       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
241     }
242   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
243   if (type == NORM_INFINITY) {
244     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
245   } else {
246     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
247   }
248   ierr = PetscFree(work);CHKERRQ(ierr);
249   if (type == NORM_2) {
250     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
251   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
252     for (i=0; i<n; i++) reductions[i] /= m;
253   }
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
258 {
259   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
260   IS              sis,gis;
261   PetscErrorCode  ierr;
262   const PetscInt  *isis,*igis;
263   PetscInt        n,*iis,nsis,ngis,rstart,i;
264 
265   PetscFunctionBegin;
266   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
267   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
268   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
269   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
270   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
271   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
272 
273   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
274   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
275   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
276   n    = ngis + nsis;
277   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
278   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
279   for (i=0; i<n; i++) iis[i] += rstart;
280   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
281 
282   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
283   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
284   ierr = ISDestroy(&sis);CHKERRQ(ierr);
285   ierr = ISDestroy(&gis);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 /*
290   Local utility routine that creates a mapping from the global column
291 number to the local number in the off-diagonal part of the local
292 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
293 a slightly higher hash table cost; without it it is not scalable (each processor
294 has an order N integer array but is fast to access.
295 */
296 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
297 {
298   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
299   PetscErrorCode ierr;
300   PetscInt       n = aij->B->cmap->n,i;
301 
302   PetscFunctionBegin;
303   PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
304 #if defined(PETSC_USE_CTABLE)
305   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
306   for (i=0; i<n; i++) {
307     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
308   }
309 #else
310   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
311   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
312   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
313 #endif
314   PetscFunctionReturn(0);
315 }
316 
317 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
318 { \
319     if (col <= lastcol1)  low1 = 0;     \
320     else                 high1 = nrow1; \
321     lastcol1 = col;\
322     while (high1-low1 > 5) { \
323       t = (low1+high1)/2; \
324       if (rp1[t] > col) high1 = t; \
325       else              low1  = t; \
326     } \
327       for (_i=low1; _i<high1; _i++) { \
328         if (rp1[_i] > col) break; \
329         if (rp1[_i] == col) { \
330           if (addv == ADD_VALUES) { \
331             ap1[_i] += value;   \
332             /* Not sure LogFlops will slow dow the code or not */ \
333             (void)PetscLogFlops(1.0);   \
334            } \
335           else                    ap1[_i] = value; \
336           goto a_noinsert; \
337         } \
338       }  \
339       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
340       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
341       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
342       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
343       N = nrow1++ - 1; a->nz++; high1++; \
344       /* shift up all the later entries in this row */ \
345       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
346       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
347       rp1[_i] = col;  \
348       ap1[_i] = value;  \
349       A->nonzerostate++;\
350       a_noinsert: ; \
351       ailen[row] = nrow1; \
352 }
353 
354 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
355   { \
356     if (col <= lastcol2) low2 = 0;                        \
357     else high2 = nrow2;                                   \
358     lastcol2 = col;                                       \
359     while (high2-low2 > 5) {                              \
360       t = (low2+high2)/2;                                 \
361       if (rp2[t] > col) high2 = t;                        \
362       else             low2  = t;                         \
363     }                                                     \
364     for (_i=low2; _i<high2; _i++) {                       \
365       if (rp2[_i] > col) break;                           \
366       if (rp2[_i] == col) {                               \
367         if (addv == ADD_VALUES) {                         \
368           ap2[_i] += value;                               \
369           (void)PetscLogFlops(1.0);                       \
370         }                                                 \
371         else                    ap2[_i] = value;          \
372         goto b_noinsert;                                  \
373       }                                                   \
374     }                                                     \
375     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
376     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
377     PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
379     N = nrow2++ - 1; b->nz++; high2++;                    \
380     /* shift up all the later entries in this row */      \
381     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
382     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
383     rp2[_i] = col;                                        \
384     ap2[_i] = value;                                      \
385     B->nonzerostate++;                                    \
386     b_noinsert: ;                                         \
387     bilen[row] = nrow2;                                   \
388   }
389 
390 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
391 {
392   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
393   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
394   PetscErrorCode ierr;
395   PetscInt       l,*garray = mat->garray,diag;
396   PetscScalar    *aa,*ba;
397 
398   PetscFunctionBegin;
399   /* code only works for square matrices A */
400 
401   /* find size of row to the left of the diagonal part */
402   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
403   row  = row - diag;
404   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
405     if (garray[b->j[b->i[row]+l]] > diag) break;
406   }
407   if (l) {
408     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
409     ierr = PetscArraycpy(ba+b->i[row],v,l);CHKERRQ(ierr);
410     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
411   }
412 
413   /* diagonal part */
414   if (a->i[row+1]-a->i[row]) {
415     ierr = MatSeqAIJGetArray(mat->A,&aa);CHKERRQ(ierr);
416     ierr = PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
417     ierr = MatSeqAIJRestoreArray(mat->A,&aa);CHKERRQ(ierr);
418   }
419 
420   /* right of diagonal part */
421   if (b->i[row+1]-b->i[row]-l) {
422     ierr = MatSeqAIJGetArray(mat->B,&ba);CHKERRQ(ierr);
423     ierr = PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
424     ierr = MatSeqAIJRestoreArray(mat->B,&ba);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
430 {
431   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
432   PetscScalar    value = 0.0;
433   PetscErrorCode ierr;
434   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
435   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
436   PetscBool      roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat        A                    = aij->A;
440   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
441   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
442   PetscBool  ignorezeroentries    = a->ignorezeroentries;
443   Mat        B                    = aij->B;
444   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
445   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
446   MatScalar  *aa,*ba;
447   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
448   PetscInt   nonew;
449   MatScalar  *ap1,*ap2;
450 
451   PetscFunctionBegin;
452   ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
453   ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j=0; j<n; j++) {
475         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
481         } else if (in[j] < 0) continue;
482         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); /* map global col ids to local ones */
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); /* Change aij->B from reduced/local format to expanded/global format */
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
510               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
511                 ierr = PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
512               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
513             }
514           } else col = in[j];
515           nonew = b->nonew;
516           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
517         }
518       }
519     } else {
520       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
521       if (!aij->donotstash) {
522         mat->assembled = PETSC_FALSE;
523         if (roworiented) {
524           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
532   ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 /*
537     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
538     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
539     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
540 */
541 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
542 {
543   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
544   Mat            A           = aij->A; /* diagonal part of the matrix */
545   Mat            B           = aij->B; /* offdiagonal part of the matrix */
546   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
547   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
548   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
549   PetscInt       *ailen      = a->ilen,*aj = a->j;
550   PetscInt       *bilen      = b->ilen,*bj = b->j;
551   PetscInt       am          = aij->A->rmap->n,j;
552   PetscInt       diag_so_far = 0,dnz;
553   PetscInt       offd_so_far = 0,onz;
554 
555   PetscFunctionBegin;
556   /* Iterate over all rows of the matrix */
557   for (j=0; j<am; j++) {
558     dnz = onz = 0;
559     /*  Iterate over all non-zero columns of the current row */
560     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
561       /* If column is in the diagonal */
562       if (mat_j[col] >= cstart && mat_j[col] < cend) {
563         aj[diag_so_far++] = mat_j[col] - cstart;
564         dnz++;
565       } else { /* off-diagonal entries */
566         bj[offd_so_far++] = mat_j[col];
567         onz++;
568       }
569     }
570     ailen[j] = dnz;
571     bilen[j] = onz;
572   }
573   PetscFunctionReturn(0);
574 }
575 
576 /*
577     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
578     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
579     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
580     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
581     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
582 */
583 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
584 {
585   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
586   Mat            A      = aij->A; /* diagonal part of the matrix */
587   Mat            B      = aij->B; /* offdiagonal part of the matrix */
588   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
589   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
590   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
591   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
592   PetscInt       *ailen = a->ilen,*aj = a->j;
593   PetscInt       *bilen = b->ilen,*bj = b->j;
594   PetscInt       am     = aij->A->rmap->n,j;
595   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
596   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
597   PetscScalar    *aa = a->a,*ba = b->a;
598 
599   PetscFunctionBegin;
600   /* Iterate over all rows of the matrix */
601   for (j=0; j<am; j++) {
602     dnz_row = onz_row = 0;
603     rowstart_offd = full_offd_i[j];
604     rowstart_diag = full_diag_i[j];
605     /*  Iterate over all non-zero columns of the current row */
606     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
607       /* If column is in the diagonal */
608       if (mat_j[col] >= cstart && mat_j[col] < cend) {
609         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
610         aa[rowstart_diag+dnz_row] = mat_a[col];
611         dnz_row++;
612       } else { /* off-diagonal entries */
613         bj[rowstart_offd+onz_row] = mat_j[col];
614         ba[rowstart_offd+onz_row] = mat_a[col];
615         onz_row++;
616       }
617     }
618     ailen[j] = dnz_row;
619     bilen[j] = onz_row;
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
625 {
626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
627   PetscErrorCode ierr;
628   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
629   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
630 
631   PetscFunctionBegin;
632   for (i=0; i<m; i++) {
633     if (idxm[i] < 0) continue; /* negative row */
634     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
635     if (idxm[i] >= rstart && idxm[i] < rend) {
636       row = idxm[i] - rstart;
637       for (j=0; j<n; j++) {
638         if (idxn[j] < 0) continue; /* negative column */
639         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
640         if (idxn[j] >= cstart && idxn[j] < cend) {
641           col  = idxn[j] - cstart;
642           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
643         } else {
644           if (!aij->colmap) {
645             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
646           }
647 #if defined(PETSC_USE_CTABLE)
648           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
654           else {
655             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
656           }
657         }
658       }
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
665 {
666   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
667   PetscErrorCode ierr;
668   PetscInt       nstash,reallocs;
669 
670   PetscFunctionBegin;
671   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
672 
673   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
674   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
675   ierr = PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
680 {
681   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
682   PetscErrorCode ierr;
683   PetscMPIInt    n;
684   PetscInt       i,j,rstart,ncols,flg;
685   PetscInt       *row,*col;
686   PetscBool      other_disassembled;
687   PetscScalar    *val;
688 
689   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
690 
691   PetscFunctionBegin;
692   if (!aij->donotstash && !mat->nooffprocentries) {
693     while (1) {
694       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
695       if (!flg) break;
696 
697       for (i=0; i<n;) {
698         /* Now identify the consecutive vals belonging to the same row */
699         for (j=i,rstart=row[j]; j<n; j++) {
700           if (row[j] != rstart) break;
701         }
702         if (j < n) ncols = j-i;
703         else       ncols = n-i;
704         /* Now assemble all these values with a single function call */
705         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
706         i    = j;
707       }
708     }
709     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
710   }
711 #if defined(PETSC_HAVE_DEVICE)
712   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
713   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
714   if (mat->boundtocpu) {
715     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
716     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
717   }
718 #endif
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourself, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
730     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738 #if defined(PETSC_HAVE_DEVICE)
739   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
740 #endif
741   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
742   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
743 
744   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
745 
746   aij->rowvalues = NULL;
747 
748   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
749 
750   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
751   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
752     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
753     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
754   }
755 #if defined(PETSC_HAVE_DEVICE)
756   mat->offloadmask = PETSC_OFFLOAD_BOTH;
757 #endif
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
762 {
763   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
768   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
773 {
774   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
775   PetscObjectState sA, sB;
776   PetscInt        *lrows;
777   PetscInt         r, len;
778   PetscBool        cong, lch, gch;
779   PetscErrorCode   ierr;
780 
781   PetscFunctionBegin;
782   /* get locally owned rows */
783   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
784   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
791     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
793     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
794     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
795     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
796   }
797 
798   sA = mat->A->nonzerostate;
799   sB = mat->B->nonzerostate;
800 
801   if (diag != 0.0 && cong) {
802     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
803     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
805     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
806     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
807     PetscInt   nnwA, nnwB;
808     PetscBool  nnzA, nnzB;
809 
810     nnwA = aijA->nonew;
811     nnwB = aijB->nonew;
812     nnzA = aijA->keepnonzeropattern;
813     nnzB = aijB->keepnonzeropattern;
814     if (!nnzA) {
815       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
816       aijA->nonew = 0;
817     }
818     if (!nnzB) {
819       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
820       aijB->nonew = 0;
821     }
822     /* Must zero here before the next loop */
823     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
824     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     for (r = 0; r < len; ++r) {
826       const PetscInt row = lrows[r] + A->rmap->rstart;
827       if (row >= A->cmap->N) continue;
828       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
829     }
830     aijA->nonew = nnwA;
831     aijB->nonew = nnwB;
832   } else {
833     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
834     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
835   }
836   ierr = PetscFree(lrows);CHKERRQ(ierr);
837   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839 
840   /* reduce nonzerostate */
841   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
842   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
843   if (gch) A->nonzerostate++;
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
848 {
849   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
850   PetscErrorCode    ierr;
851   PetscMPIInt       n = A->rmap->n;
852   PetscInt          i,j,r,m,len = 0;
853   PetscInt          *lrows,*owners = A->rmap->range;
854   PetscMPIInt       p = 0;
855   PetscSFNode       *rrows;
856   PetscSF           sf;
857   const PetscScalar *xx;
858   PetscScalar       *bb,*mask,*aij_a;
859   Vec               xmask,lmask;
860   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
861   const PetscInt    *aj, *ii,*ridx;
862   PetscScalar       *aa;
863 
864   PetscFunctionBegin;
865   /* Create SF where leaves are input rows and roots are owned rows */
866   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
867   for (r = 0; r < n; ++r) lrows[r] = -1;
868   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
869   for (r = 0; r < N; ++r) {
870     const PetscInt idx   = rows[r];
871     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
872     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
873       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
874     }
875     rrows[r].rank  = p;
876     rrows[r].index = rows[r] - owners[p];
877   }
878   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
879   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
880   /* Collect flags for rows to be zeroed */
881   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
882   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
883   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
884   /* Compress and put in row numbers */
885   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
886   /* zero diagonal part of matrix */
887   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
888   /* handle off diagonal part of matrix */
889   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
890   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
891   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
892   for (i=0; i<len; i++) bb[lrows[i]] = 1;
893   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
894   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
897   if (x && b) { /* this code is buggy when the row and column layout don't match */
898     PetscBool cong;
899 
900     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
901     PetscCheckFalse(!cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
902     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
905     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
906   }
907   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
908   /* remove zeroed rows of off diagonal matrix */
909   ierr = MatSeqAIJGetArray(l->B,&aij_a);CHKERRQ(ierr);
910   ii = aij->i;
911   for (i=0; i<len; i++) {
912     ierr = PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
913   }
914   /* loop over all elements of off process part of matrix zeroing removed columns*/
915   if (aij->compressedrow.use) {
916     m    = aij->compressedrow.nrows;
917     ii   = aij->compressedrow.i;
918     ridx = aij->compressedrow.rindex;
919     for (i=0; i<m; i++) {
920       n  = ii[i+1] - ii[i];
921       aj = aij->j + ii[i];
922       aa = aij_a + ii[i];
923 
924       for (j=0; j<n; j++) {
925         if (PetscAbsScalar(mask[*aj])) {
926           if (b) bb[*ridx] -= *aa*xx[*aj];
927           *aa = 0.0;
928         }
929         aa++;
930         aj++;
931       }
932       ridx++;
933     }
934   } else { /* do not use compressed row format */
935     m = l->B->rmap->n;
936     for (i=0; i<m; i++) {
937       n  = ii[i+1] - ii[i];
938       aj = aij->j + ii[i];
939       aa = aij_a + ii[i];
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           if (b) bb[i] -= *aa*xx[*aj];
943           *aa = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948     }
949   }
950   if (x && b) {
951     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
952     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
953   }
954   ierr = MatSeqAIJRestoreArray(l->B,&aij_a);CHKERRQ(ierr);
955   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
956   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
957   ierr = PetscFree(lrows);CHKERRQ(ierr);
958 
959   /* only change matrix nonzero state if pattern was allowed to be changed */
960   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
961     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
962     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971   PetscInt       nt;
972   VecScatter     Mvctx = a->Mvctx;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt);
977   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
979   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
985 {
986   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   VecScatter     Mvctx = a->Mvctx;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1003   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1009 {
1010   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   /* do nondiagonal part */
1015   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1016   /* do local part */
1017   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1018   /* add partial results together */
1019   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1025 {
1026   MPI_Comm       comm;
1027   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1028   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1029   IS             Me,Notme;
1030   PetscErrorCode ierr;
1031   PetscInt       M,N,first,last,*notme,i;
1032   PetscBool      lf;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1039   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1040   if (!*f) PetscFunctionReturn(0);
1041   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1042   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1043   if (size == 1) PetscFunctionReturn(0);
1044 
1045   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1046   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1047   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1049   for (i=0; i<first; i++) notme[i] = i;
1050   for (i=last; i<M; i++) notme[i-last+first] = i;
1051   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1052   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1053   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1054   Aoff = Aoffs[0];
1055   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1056   Boff = Boffs[0];
1057   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1059   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1061   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1062   ierr = PetscFree(notme);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1067 {
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   /* do nondiagonal part */
1082   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1083   /* do local part */
1084   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1085   /* add partial results together */
1086   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /*
1092   This only works correctly for square matrices where the subblock A->A is the
1093    diagonal block
1094 */
1095 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1096 {
1097   PetscErrorCode ierr;
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099 
1100   PetscFunctionBegin;
1101   PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1102   PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1103   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1114   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1119 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1120 {
1121   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = PetscSFDestroy(&aij->coo_sf);CHKERRQ(ierr);
1126   ierr = PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1);CHKERRQ(ierr);
1127   ierr = PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2);CHKERRQ(ierr);
1128   ierr = PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2);CHKERRQ(ierr);
1129   ierr = PetscFree2(aij->sendbuf,aij->recvbuf);CHKERRQ(ierr);
1130   ierr = PetscFree(aij->Cperm1);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1135 {
1136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1137   PetscErrorCode ierr;
1138 
1139   PetscFunctionBegin;
1140 #if defined(PETSC_USE_LOG)
1141   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1142 #endif
1143   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1144   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1145   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1146   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1147 #if defined(PETSC_USE_CTABLE)
1148   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1149 #else
1150   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1151 #endif
1152   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1153   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1154   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1155   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1156   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1157 
1158   /* Free COO */
1159   ierr = MatResetPreallocationCOO_MPIAIJ(mat);CHKERRQ(ierr);
1160 
1161   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1162 
1163   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1164   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1165 
1166   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1176 #if defined(PETSC_HAVE_CUDA)
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1178 #endif
1179 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1181 #endif
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1183 #if defined(PETSC_HAVE_ELEMENTAL)
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1185 #endif
1186 #if defined(PETSC_HAVE_SCALAPACK)
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1188 #endif
1189 #if defined(PETSC_HAVE_HYPRE)
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1192 #endif
1193   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1194   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1199 #if defined(PETSC_HAVE_MKL_SPARSE)
1200   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1201 #endif
1202   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1203   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1204   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1211 {
1212   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1213   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1214   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1215   const PetscInt    *garray = aij->garray;
1216   const PetscScalar *aa,*ba;
1217   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1218   PetscInt          *rowlens;
1219   PetscInt          *colidxs;
1220   PetscScalar       *matvals;
1221   PetscErrorCode    ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1225 
1226   M  = mat->rmap->N;
1227   N  = mat->cmap->N;
1228   m  = mat->rmap->n;
1229   rs = mat->rmap->rstart;
1230   cs = mat->cmap->rstart;
1231   nz = A->nz + B->nz;
1232 
1233   /* write matrix header */
1234   header[0] = MAT_FILE_CLASSID;
1235   header[1] = M; header[2] = N; header[3] = nz;
1236   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1237   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1238 
1239   /* fill in and store row lengths  */
1240   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1241   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1242   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1243   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1244 
1245   /* fill in and store column indices */
1246   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1247   for (cnt=0, i=0; i<m; i++) {
1248     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1249       if (garray[B->j[jb]] > cs) break;
1250       colidxs[cnt++] = garray[B->j[jb]];
1251     }
1252     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1253       colidxs[cnt++] = A->j[ja] + cs;
1254     for (; jb<B->i[i+1]; jb++)
1255       colidxs[cnt++] = garray[B->j[jb]];
1256   }
1257   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1258   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1259   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1260 
1261   /* fill in and store nonzero values */
1262   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1263   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1264   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1265   for (cnt=0, i=0; i<m; i++) {
1266     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1267       if (garray[B->j[jb]] > cs) break;
1268       matvals[cnt++] = ba[jb];
1269     }
1270     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1271       matvals[cnt++] = aa[ja];
1272     for (; jb<B->i[i+1]; jb++)
1273       matvals[cnt++] = ba[jb];
1274   }
1275   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1276   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1277   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1278   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1279   ierr = PetscFree(matvals);CHKERRQ(ierr);
1280 
1281   /* write block size option to the viewer's .info file */
1282   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #include <petscdraw.h>
1287 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1288 {
1289   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank = aij->rank,size = aij->size;
1292   PetscBool         isdraw,iascii,isbinary;
1293   PetscViewer       sviewer;
1294   PetscViewerFormat format;
1295 
1296   PetscFunctionBegin;
1297   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1298   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1299   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1300   if (iascii) {
1301     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1302     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1303       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1304       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1305       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1306       for (i=0; i<(PetscInt)size; i++) {
1307         nmax = PetscMax(nmax,nz[i]);
1308         nmin = PetscMin(nmin,nz[i]);
1309         navg += nz[i];
1310       }
1311       ierr = PetscFree(nz);CHKERRQ(ierr);
1312       navg = navg/size;
1313       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax);CHKERRQ(ierr);
1314       PetscFunctionReturn(0);
1315     }
1316     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1317     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1318       MatInfo   info;
1319       PetscInt *inodes=NULL;
1320 
1321       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1322       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1323       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1324       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1325       if (!inodes) {
1326         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1327                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1328       } else {
1329         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1330                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1331       }
1332       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1333       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1334       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1335       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1336       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1337       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1338       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1339       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1340       PetscFunctionReturn(0);
1341     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1342       PetscInt inodecount,inodelimit,*inodes;
1343       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1344       if (inodes) {
1345         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit);CHKERRQ(ierr);
1346       } else {
1347         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1348       }
1349       PetscFunctionReturn(0);
1350     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1351       PetscFunctionReturn(0);
1352     }
1353   } else if (isbinary) {
1354     if (size == 1) {
1355       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1356       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1357     } else {
1358       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1359     }
1360     PetscFunctionReturn(0);
1361   } else if (iascii && size == 1) {
1362     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1363     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1364     PetscFunctionReturn(0);
1365   } else if (isdraw) {
1366     PetscDraw draw;
1367     PetscBool isnull;
1368     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1369     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1370     if (isnull) PetscFunctionReturn(0);
1371   }
1372 
1373   { /* assemble the entire matrix onto first processor */
1374     Mat A = NULL, Av;
1375     IS  isrow,iscol;
1376 
1377     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1378     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1379     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1380     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1381 /*  The commented code uses MatCreateSubMatrices instead */
1382 /*
1383     Mat *AA, A = NULL, Av;
1384     IS  isrow,iscol;
1385 
1386     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1387     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1388     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1389     if (rank == 0) {
1390        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1391        A    = AA[0];
1392        Av   = AA[0];
1393     }
1394     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1395 */
1396     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1397     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1398     /*
1399        Everyone has to call to draw the matrix since the graphics waits are
1400        synchronized across all processors that share the PetscDraw object
1401     */
1402     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1403     if (rank == 0) {
1404       if (((PetscObject)mat)->name) {
1405         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1406       }
1407       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1408     }
1409     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1410     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1411     ierr = MatDestroy(&A);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1417 {
1418   PetscErrorCode ierr;
1419   PetscBool      iascii,isdraw,issocket,isbinary;
1420 
1421   PetscFunctionBegin;
1422   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1423   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1424   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1425   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1426   if (iascii || isdraw || isbinary || issocket) {
1427     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1433 {
1434   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1435   PetscErrorCode ierr;
1436   Vec            bb1 = NULL;
1437   PetscBool      hasop;
1438 
1439   PetscFunctionBegin;
1440   if (flag == SOR_APPLY_UPPER) {
1441     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1442     PetscFunctionReturn(0);
1443   }
1444 
1445   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1446     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1447   }
1448 
1449   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1450     if (flag & SOR_ZERO_INITIAL_GUESS) {
1451       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1452       its--;
1453     }
1454 
1455     while (its--) {
1456       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1461       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1462 
1463       /* local sweep */
1464       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1465     }
1466   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1467     if (flag & SOR_ZERO_INITIAL_GUESS) {
1468       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1469       its--;
1470     }
1471     while (its--) {
1472       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1477       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1478 
1479       /* local sweep */
1480       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1481     }
1482   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1483     if (flag & SOR_ZERO_INITIAL_GUESS) {
1484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1485       its--;
1486     }
1487     while (its--) {
1488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490 
1491       /* update rhs: bb1 = bb - B*x */
1492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1494 
1495       /* local sweep */
1496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1497     }
1498   } else if (flag & SOR_EISENSTAT) {
1499     Vec xx1;
1500 
1501     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1502     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1503 
1504     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506     if (!mat->diag) {
1507       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1508       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1509     }
1510     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1511     if (hasop) {
1512       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1513     } else {
1514       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1515     }
1516     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1517 
1518     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1519 
1520     /* local sweep */
1521     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1522     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1523     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1524   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1525 
1526   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1527 
1528   matin->factorerrortype = mat->A->factorerrortype;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1533 {
1534   Mat            aA,aB,Aperm;
1535   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1536   PetscScalar    *aa,*ba;
1537   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1538   PetscSF        rowsf,sf;
1539   IS             parcolp = NULL;
1540   PetscBool      done;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1545   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1546   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1547   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1548 
1549   /* Invert row permutation to find out where my rows should go */
1550   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1551   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1552   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1553   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1554   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1555   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1556 
1557   /* Invert column permutation to find out where my columns should go */
1558   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1559   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1560   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1561   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1562   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1563   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1565 
1566   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1567   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1568   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1569 
1570   /* Find out where my gcols should go */
1571   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1572   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1573   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1574   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1575   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1576   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1579 
1580   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1581   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1582   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1583   for (i=0; i<m; i++) {
1584     PetscInt    row = rdest[i];
1585     PetscMPIInt rowner;
1586     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1587     for (j=ai[i]; j<ai[i+1]; j++) {
1588       PetscInt    col = cdest[aj[j]];
1589       PetscMPIInt cowner;
1590       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1591       if (rowner == cowner) dnnz[i]++;
1592       else onnz[i]++;
1593     }
1594     for (j=bi[i]; j<bi[i+1]; j++) {
1595       PetscInt    col = gcdest[bj[j]];
1596       PetscMPIInt cowner;
1597       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1598       if (rowner == cowner) dnnz[i]++;
1599       else onnz[i]++;
1600     }
1601   }
1602   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1604   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1605   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1606   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1607 
1608   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1609   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1610   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1611   for (i=0; i<m; i++) {
1612     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1613     PetscInt j0,rowlen;
1614     rowlen = ai[i+1] - ai[i];
1615     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1616       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1617       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1618     }
1619     rowlen = bi[i+1] - bi[i];
1620     for (j0=j=0; j<rowlen; j0=j) {
1621       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1622       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1623     }
1624   }
1625   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1626   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1627   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1628   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1629   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1630   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1631   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1632   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1633   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1634   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1635   *B = Aperm;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1640 {
1641   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1642   PetscErrorCode ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1646   if (ghosts) *ghosts = aij->garray;
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1651 {
1652   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1653   Mat            A    = mat->A,B = mat->B;
1654   PetscErrorCode ierr;
1655   PetscLogDouble isend[5],irecv[5];
1656 
1657   PetscFunctionBegin;
1658   info->block_size = 1.0;
1659   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1660 
1661   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1662   isend[3] = info->memory;  isend[4] = info->mallocs;
1663 
1664   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1665 
1666   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1667   isend[3] += info->memory;  isend[4] += info->mallocs;
1668   if (flag == MAT_LOCAL) {
1669     info->nz_used      = isend[0];
1670     info->nz_allocated = isend[1];
1671     info->nz_unneeded  = isend[2];
1672     info->memory       = isend[3];
1673     info->mallocs      = isend[4];
1674   } else if (flag == MAT_GLOBAL_MAX) {
1675     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1676 
1677     info->nz_used      = irecv[0];
1678     info->nz_allocated = irecv[1];
1679     info->nz_unneeded  = irecv[2];
1680     info->memory       = irecv[3];
1681     info->mallocs      = irecv[4];
1682   } else if (flag == MAT_GLOBAL_SUM) {
1683     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1684 
1685     info->nz_used      = irecv[0];
1686     info->nz_allocated = irecv[1];
1687     info->nz_unneeded  = irecv[2];
1688     info->memory       = irecv[3];
1689     info->mallocs      = irecv[4];
1690   }
1691   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1692   info->fill_ratio_needed = 0;
1693   info->factor_mallocs    = 0;
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1698 {
1699   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1700   PetscErrorCode ierr;
1701 
1702   PetscFunctionBegin;
1703   switch (op) {
1704   case MAT_NEW_NONZERO_LOCATIONS:
1705   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1706   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1707   case MAT_KEEP_NONZERO_PATTERN:
1708   case MAT_NEW_NONZERO_LOCATION_ERR:
1709   case MAT_USE_INODES:
1710   case MAT_IGNORE_ZERO_ENTRIES:
1711   case MAT_FORM_EXPLICIT_TRANSPOSE:
1712     MatCheckPreallocated(A,1);
1713     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1714     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1715     break;
1716   case MAT_ROW_ORIENTED:
1717     MatCheckPreallocated(A,1);
1718     a->roworiented = flg;
1719 
1720     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1721     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1722     break;
1723   case MAT_FORCE_DIAGONAL_ENTRIES:
1724   case MAT_SORTED_FULL:
1725     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1726     break;
1727   case MAT_IGNORE_OFF_PROC_ENTRIES:
1728     a->donotstash = flg;
1729     break;
1730   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1731   case MAT_SPD:
1732   case MAT_SYMMETRIC:
1733   case MAT_STRUCTURALLY_SYMMETRIC:
1734   case MAT_HERMITIAN:
1735   case MAT_SYMMETRY_ETERNAL:
1736     break;
1737   case MAT_SUBMAT_SINGLEIS:
1738     A->submat_singleis = flg;
1739     break;
1740   case MAT_STRUCTURE_ONLY:
1741     /* The option is handled directly by MatSetOption() */
1742     break;
1743   default:
1744     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1750 {
1751   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1752   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1753   PetscErrorCode ierr;
1754   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1755   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1756   PetscInt       *cmap,*idx_p;
1757 
1758   PetscFunctionBegin;
1759   PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1760   mat->getrowactive = PETSC_TRUE;
1761 
1762   if (!mat->rowvalues && (idx || v)) {
1763     /*
1764         allocate enough space to hold information from the longest row.
1765     */
1766     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1767     PetscInt   max = 1,tmp;
1768     for (i=0; i<matin->rmap->n; i++) {
1769       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1770       if (max < tmp) max = tmp;
1771     }
1772     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1773   }
1774 
1775   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1776   lrow = row - rstart;
1777 
1778   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1779   if (!v)   {pvA = NULL; pvB = NULL;}
1780   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1781   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1782   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1783   nztot = nzA + nzB;
1784 
1785   cmap = mat->garray;
1786   if (v  || idx) {
1787     if (nztot) {
1788       /* Sort by increasing column numbers, assuming A and B already sorted */
1789       PetscInt imark = -1;
1790       if (v) {
1791         *v = v_p = mat->rowvalues;
1792         for (i=0; i<nzB; i++) {
1793           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1794           else break;
1795         }
1796         imark = i;
1797         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1798         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1799       }
1800       if (idx) {
1801         *idx = idx_p = mat->rowindices;
1802         if (imark > -1) {
1803           for (i=0; i<imark; i++) {
1804             idx_p[i] = cmap[cworkB[i]];
1805           }
1806         } else {
1807           for (i=0; i<nzB; i++) {
1808             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1809             else break;
1810           }
1811           imark = i;
1812         }
1813         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1814         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1815       }
1816     } else {
1817       if (idx) *idx = NULL;
1818       if (v)   *v   = NULL;
1819     }
1820   }
1821   *nz  = nztot;
1822   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1823   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1828 {
1829   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1830 
1831   PetscFunctionBegin;
1832   PetscCheckFalse(!aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1833   aij->getrowactive = PETSC_FALSE;
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1838 {
1839   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1840   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1841   PetscErrorCode  ierr;
1842   PetscInt        i,j,cstart = mat->cmap->rstart;
1843   PetscReal       sum = 0.0;
1844   const MatScalar *v,*amata,*bmata;
1845 
1846   PetscFunctionBegin;
1847   if (aij->size == 1) {
1848     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1849   } else {
1850     ierr = MatSeqAIJGetArrayRead(aij->A,&amata);CHKERRQ(ierr);
1851     ierr = MatSeqAIJGetArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1852     if (type == NORM_FROBENIUS) {
1853       v = amata;
1854       for (i=0; i<amat->nz; i++) {
1855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1856       }
1857       v = bmata;
1858       for (i=0; i<bmat->nz; i++) {
1859         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1860       }
1861       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1862       *norm = PetscSqrtReal(*norm);
1863       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1864     } else if (type == NORM_1) { /* max column norm */
1865       PetscReal *tmp,*tmp2;
1866       PetscInt  *jj,*garray = aij->garray;
1867       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1868       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1869       *norm = 0.0;
1870       v     = amata; jj = amat->j;
1871       for (j=0; j<amat->nz; j++) {
1872         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1873       }
1874       v = bmata; jj = bmat->j;
1875       for (j=0; j<bmat->nz; j++) {
1876         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1877       }
1878       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1879       for (j=0; j<mat->cmap->N; j++) {
1880         if (tmp2[j] > *norm) *norm = tmp2[j];
1881       }
1882       ierr = PetscFree(tmp);CHKERRQ(ierr);
1883       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1884       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1885     } else if (type == NORM_INFINITY) { /* max row norm */
1886       PetscReal ntemp = 0.0;
1887       for (j=0; j<aij->A->rmap->n; j++) {
1888         v   = amata + amat->i[j];
1889         sum = 0.0;
1890         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v); v++;
1892         }
1893         v = bmata + bmat->i[j];
1894         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1895           sum += PetscAbsScalar(*v); v++;
1896         }
1897         if (sum > ntemp) ntemp = sum;
1898       }
1899       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1900       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1901     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1902     ierr = MatSeqAIJRestoreArrayRead(aij->A,&amata);CHKERRQ(ierr);
1903     ierr = MatSeqAIJRestoreArrayRead(aij->B,&bmata);CHKERRQ(ierr);
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1909 {
1910   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1911   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1912   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1913   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1914   PetscErrorCode  ierr;
1915   Mat             B,A_diag,*B_diag;
1916   const MatScalar *pbv,*bv;
1917 
1918   PetscFunctionBegin;
1919   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1920   ai = Aloc->i; aj = Aloc->j;
1921   bi = Bloc->i; bj = Bloc->j;
1922   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1923     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1924     PetscSFNode          *oloc;
1925     PETSC_UNUSED PetscSF sf;
1926 
1927     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1928     /* compute d_nnz for preallocation */
1929     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1930     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1931     /* compute local off-diagonal contributions */
1932     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1933     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1934     /* map those to global */
1935     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1936     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1937     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1938     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1939     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1940     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1941     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1942 
1943     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1944     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1945     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1946     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1947     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1948     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1949   } else {
1950     B    = *matout;
1951     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1952   }
1953 
1954   b           = (Mat_MPIAIJ*)B->data;
1955   A_diag      = a->A;
1956   B_diag      = &b->A;
1957   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1958   A_diag_ncol = A_diag->cmap->N;
1959   B_diag_ilen = sub_B_diag->ilen;
1960   B_diag_i    = sub_B_diag->i;
1961 
1962   /* Set ilen for diagonal of B */
1963   for (i=0; i<A_diag_ncol; i++) {
1964     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1965   }
1966 
1967   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1968   very quickly (=without using MatSetValues), because all writes are local. */
1969   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1970 
1971   /* copy over the B part */
1972   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1973   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1974   pbv  = bv;
1975   row  = A->rmap->rstart;
1976   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1977   cols_tmp = cols;
1978   for (i=0; i<mb; i++) {
1979     ncol = bi[i+1]-bi[i];
1980     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1981     row++;
1982     pbv += ncol; cols_tmp += ncol;
1983   }
1984   ierr = PetscFree(cols);CHKERRQ(ierr);
1985   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1986 
1987   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1989   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1990     *matout = B;
1991   } else {
1992     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1998 {
1999   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2000   Mat            a    = aij->A,b = aij->B;
2001   PetscErrorCode ierr;
2002   PetscInt       s1,s2,s3;
2003 
2004   PetscFunctionBegin;
2005   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2006   if (rr) {
2007     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2008     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2009     /* Overlap communication with computation. */
2010     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2011   }
2012   if (ll) {
2013     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2014     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2015     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2016   }
2017   /* scale  the diagonal block */
2018   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2019 
2020   if (rr) {
2021     /* Do a scatter end and then right scale the off-diagonal block */
2022     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2023     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2024   }
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2029 {
2030   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2031   PetscErrorCode ierr;
2032 
2033   PetscFunctionBegin;
2034   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2039 {
2040   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2041   Mat            a,b,c,d;
2042   PetscBool      flg;
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   a = matA->A; b = matA->B;
2047   c = matB->A; d = matB->B;
2048 
2049   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2050   if (flg) {
2051     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2052   }
2053   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2058 {
2059   PetscErrorCode ierr;
2060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2061   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2062 
2063   PetscFunctionBegin;
2064   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2065   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2066     /* because of the column compression in the off-processor part of the matrix a->B,
2067        the number of columns in a->B and b->B may be different, hence we cannot call
2068        the MatCopy() directly on the two parts. If need be, we can provide a more
2069        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2070        then copying the submatrices */
2071     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2072   } else {
2073     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2074     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2075   }
2076   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2081 {
2082   PetscErrorCode ierr;
2083 
2084   PetscFunctionBegin;
2085   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 /*
2090    Computes the number of nonzeros per row needed for preallocation when X and Y
2091    have different nonzero structure.
2092 */
2093 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2094 {
2095   PetscInt       i,j,k,nzx,nzy;
2096 
2097   PetscFunctionBegin;
2098   /* Set the number of nonzeros in the new matrix */
2099   for (i=0; i<m; i++) {
2100     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2101     nzx = xi[i+1] - xi[i];
2102     nzy = yi[i+1] - yi[i];
2103     nnz[i] = 0;
2104     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2105       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2106       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2107       nnz[i]++;
2108     }
2109     for (; k<nzy; k++) nnz[i]++;
2110   }
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2115 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2116 {
2117   PetscErrorCode ierr;
2118   PetscInt       m = Y->rmap->N;
2119   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2120   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2121 
2122   PetscFunctionBegin;
2123   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2128 {
2129   PetscErrorCode ierr;
2130   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2131 
2132   PetscFunctionBegin;
2133   if (str == SAME_NONZERO_PATTERN) {
2134     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2135     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2136   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2137     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2138   } else {
2139     Mat      B;
2140     PetscInt *nnz_d,*nnz_o;
2141 
2142     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2143     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2144     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2145     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2146     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2147     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2148     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2149     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2150     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2151     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2152     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
2153     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2154     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2155   }
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2160 
2161 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2162 {
2163 #if defined(PETSC_USE_COMPLEX)
2164   PetscErrorCode ierr;
2165   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2166 
2167   PetscFunctionBegin;
2168   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2169   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2170 #else
2171   PetscFunctionBegin;
2172 #endif
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2177 {
2178   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2183   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2188 {
2189   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2190   PetscErrorCode ierr;
2191 
2192   PetscFunctionBegin;
2193   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2194   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2199 {
2200   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2201   PetscErrorCode    ierr;
2202   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2203   PetscScalar       *va,*vv;
2204   Vec               vB,vA;
2205   const PetscScalar *vb;
2206 
2207   PetscFunctionBegin;
2208   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2209   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2210 
2211   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2212   if (idx) {
2213     for (i=0; i<m; i++) {
2214       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2215     }
2216   }
2217 
2218   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2219   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2220   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2221 
2222   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2223   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2224   for (i=0; i<m; i++) {
2225     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2226       vv[i] = vb[i];
2227       if (idx) idx[i] = a->garray[idxb[i]];
2228     } else {
2229       vv[i] = va[i];
2230       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2231         idx[i] = a->garray[idxb[i]];
2232     }
2233   }
2234   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2235   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2236   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2237   ierr = PetscFree(idxb);CHKERRQ(ierr);
2238   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2239   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2244 {
2245   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2246   PetscInt          m = A->rmap->n,n = A->cmap->n;
2247   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2248   PetscInt          *cmap  = mat->garray;
2249   PetscInt          *diagIdx, *offdiagIdx;
2250   Vec               diagV, offdiagV;
2251   PetscScalar       *a, *diagA, *offdiagA;
2252   const PetscScalar *ba,*bav;
2253   PetscInt          r,j,col,ncols,*bi,*bj;
2254   PetscErrorCode    ierr;
2255   Mat               B = mat->B;
2256   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2257 
2258   PetscFunctionBegin;
2259   /* When a process holds entire A and other processes have no entry */
2260   if (A->cmap->N == n) {
2261     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2262     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2263     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2264     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2265     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2266     PetscFunctionReturn(0);
2267   } else if (n == 0) {
2268     if (m) {
2269       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2270       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2271       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2272     }
2273     PetscFunctionReturn(0);
2274   }
2275 
2276   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2277   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2278   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2279   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2280 
2281   /* Get offdiagIdx[] for implicit 0.0 */
2282   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2283   ba   = bav;
2284   bi   = b->i;
2285   bj   = b->j;
2286   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2287   for (r = 0; r < m; r++) {
2288     ncols = bi[r+1] - bi[r];
2289     if (ncols == A->cmap->N - n) { /* Brow is dense */
2290       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2291     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2292       offdiagA[r] = 0.0;
2293 
2294       /* Find first hole in the cmap */
2295       for (j=0; j<ncols; j++) {
2296         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2297         if (col > j && j < cstart) {
2298           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2299           break;
2300         } else if (col > j + n && j >= cstart) {
2301           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2302           break;
2303         }
2304       }
2305       if (j == ncols && ncols < A->cmap->N - n) {
2306         /* a hole is outside compressed Bcols */
2307         if (ncols == 0) {
2308           if (cstart) {
2309             offdiagIdx[r] = 0;
2310           } else offdiagIdx[r] = cend;
2311         } else { /* ncols > 0 */
2312           offdiagIdx[r] = cmap[ncols-1] + 1;
2313           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2314         }
2315       }
2316     }
2317 
2318     for (j=0; j<ncols; j++) {
2319       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2320       ba++; bj++;
2321     }
2322   }
2323 
2324   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2325   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2326   for (r = 0; r < m; ++r) {
2327     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2328       a[r]   = diagA[r];
2329       if (idx) idx[r] = cstart + diagIdx[r];
2330     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2331       a[r] = diagA[r];
2332       if (idx) {
2333         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2334           idx[r] = cstart + diagIdx[r];
2335         } else idx[r] = offdiagIdx[r];
2336       }
2337     } else {
2338       a[r]   = offdiagA[r];
2339       if (idx) idx[r] = offdiagIdx[r];
2340     }
2341   }
2342   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2343   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2344   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2345   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2346   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2347   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2348   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2353 {
2354   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2355   PetscInt          m = A->rmap->n,n = A->cmap->n;
2356   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2357   PetscInt          *cmap  = mat->garray;
2358   PetscInt          *diagIdx, *offdiagIdx;
2359   Vec               diagV, offdiagV;
2360   PetscScalar       *a, *diagA, *offdiagA;
2361   const PetscScalar *ba,*bav;
2362   PetscInt          r,j,col,ncols,*bi,*bj;
2363   PetscErrorCode    ierr;
2364   Mat               B = mat->B;
2365   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2366 
2367   PetscFunctionBegin;
2368   /* When a process holds entire A and other processes have no entry */
2369   if (A->cmap->N == n) {
2370     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2371     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2372     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2373     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2374     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2375     PetscFunctionReturn(0);
2376   } else if (n == 0) {
2377     if (m) {
2378       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2379       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2380       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2381     }
2382     PetscFunctionReturn(0);
2383   }
2384 
2385   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2386   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2387   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2388   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2389 
2390   /* Get offdiagIdx[] for implicit 0.0 */
2391   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2392   ba   = bav;
2393   bi   = b->i;
2394   bj   = b->j;
2395   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2396   for (r = 0; r < m; r++) {
2397     ncols = bi[r+1] - bi[r];
2398     if (ncols == A->cmap->N - n) { /* Brow is dense */
2399       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2400     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2401       offdiagA[r] = 0.0;
2402 
2403       /* Find first hole in the cmap */
2404       for (j=0; j<ncols; j++) {
2405         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2406         if (col > j && j < cstart) {
2407           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2408           break;
2409         } else if (col > j + n && j >= cstart) {
2410           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2411           break;
2412         }
2413       }
2414       if (j == ncols && ncols < A->cmap->N - n) {
2415         /* a hole is outside compressed Bcols */
2416         if (ncols == 0) {
2417           if (cstart) {
2418             offdiagIdx[r] = 0;
2419           } else offdiagIdx[r] = cend;
2420         } else { /* ncols > 0 */
2421           offdiagIdx[r] = cmap[ncols-1] + 1;
2422           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2423         }
2424       }
2425     }
2426 
2427     for (j=0; j<ncols; j++) {
2428       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2429       ba++; bj++;
2430     }
2431   }
2432 
2433   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2434   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2435   for (r = 0; r < m; ++r) {
2436     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2437       a[r]   = diagA[r];
2438       if (idx) idx[r] = cstart + diagIdx[r];
2439     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2440       a[r] = diagA[r];
2441       if (idx) {
2442         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2443           idx[r] = cstart + diagIdx[r];
2444         } else idx[r] = offdiagIdx[r];
2445       }
2446     } else {
2447       a[r]   = offdiagA[r];
2448       if (idx) idx[r] = offdiagIdx[r];
2449     }
2450   }
2451   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2452   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2453   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2454   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2455   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2456   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2457   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2462 {
2463   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2464   PetscInt          m = A->rmap->n,n = A->cmap->n;
2465   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2466   PetscInt          *cmap  = mat->garray;
2467   PetscInt          *diagIdx, *offdiagIdx;
2468   Vec               diagV, offdiagV;
2469   PetscScalar       *a, *diagA, *offdiagA;
2470   const PetscScalar *ba,*bav;
2471   PetscInt          r,j,col,ncols,*bi,*bj;
2472   PetscErrorCode    ierr;
2473   Mat               B = mat->B;
2474   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2475 
2476   PetscFunctionBegin;
2477   /* When a process holds entire A and other processes have no entry */
2478   if (A->cmap->N == n) {
2479     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2480     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2481     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2482     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2483     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2484     PetscFunctionReturn(0);
2485   } else if (n == 0) {
2486     if (m) {
2487       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2488       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2489       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2490     }
2491     PetscFunctionReturn(0);
2492   }
2493 
2494   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2496   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2497   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2498 
2499   /* Get offdiagIdx[] for implicit 0.0 */
2500   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2501   ba   = bav;
2502   bi   = b->i;
2503   bj   = b->j;
2504   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2505   for (r = 0; r < m; r++) {
2506     ncols = bi[r+1] - bi[r];
2507     if (ncols == A->cmap->N - n) { /* Brow is dense */
2508       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2509     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2510       offdiagA[r] = 0.0;
2511 
2512       /* Find first hole in the cmap */
2513       for (j=0; j<ncols; j++) {
2514         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2515         if (col > j && j < cstart) {
2516           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2517           break;
2518         } else if (col > j + n && j >= cstart) {
2519           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2520           break;
2521         }
2522       }
2523       if (j == ncols && ncols < A->cmap->N - n) {
2524         /* a hole is outside compressed Bcols */
2525         if (ncols == 0) {
2526           if (cstart) {
2527             offdiagIdx[r] = 0;
2528           } else offdiagIdx[r] = cend;
2529         } else { /* ncols > 0 */
2530           offdiagIdx[r] = cmap[ncols-1] + 1;
2531           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2532         }
2533       }
2534     }
2535 
2536     for (j=0; j<ncols; j++) {
2537       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2538       ba++; bj++;
2539     }
2540   }
2541 
2542   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2543   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2544   for (r = 0; r < m; ++r) {
2545     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2546       a[r] = diagA[r];
2547       if (idx) idx[r] = cstart + diagIdx[r];
2548     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2549       a[r] = diagA[r];
2550       if (idx) {
2551         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2552           idx[r] = cstart + diagIdx[r];
2553         } else idx[r] = offdiagIdx[r];
2554       }
2555     } else {
2556       a[r] = offdiagA[r];
2557       if (idx) idx[r] = offdiagIdx[r];
2558     }
2559   }
2560   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2561   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2562   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2563   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2564   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2565   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2566   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2571 {
2572   PetscErrorCode ierr;
2573   Mat            *dummy;
2574 
2575   PetscFunctionBegin;
2576   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2577   *newmat = *dummy;
2578   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2583 {
2584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2585   PetscErrorCode ierr;
2586 
2587   PetscFunctionBegin;
2588   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2589   A->factorerrortype = a->A->factorerrortype;
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2594 {
2595   PetscErrorCode ierr;
2596   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2597 
2598   PetscFunctionBegin;
2599   PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2600   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2601   if (x->assembled) {
2602     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2603   } else {
2604     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2605   }
2606   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2607   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2612 {
2613   PetscFunctionBegin;
2614   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2615   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 /*@
2620    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2621 
2622    Collective on Mat
2623 
2624    Input Parameters:
2625 +    A - the matrix
2626 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2627 
2628  Level: advanced
2629 
2630 @*/
2631 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2632 {
2633   PetscErrorCode       ierr;
2634 
2635   PetscFunctionBegin;
2636   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2641 {
2642   PetscErrorCode       ierr;
2643   PetscBool            sc = PETSC_FALSE,flg;
2644 
2645   PetscFunctionBegin;
2646   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2647   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2648   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2649   if (flg) {
2650     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2651   }
2652   ierr = PetscOptionsTail();CHKERRQ(ierr);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2657 {
2658   PetscErrorCode ierr;
2659   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2660   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2661 
2662   PetscFunctionBegin;
2663   if (!Y->preallocated) {
2664     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2665   } else if (!aij->nz) {
2666     PetscInt nonew = aij->nonew;
2667     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2668     aij->nonew = nonew;
2669   }
2670   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2675 {
2676   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2677   PetscErrorCode ierr;
2678 
2679   PetscFunctionBegin;
2680   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2681   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2682   if (d) {
2683     PetscInt rstart;
2684     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2685     *d += rstart;
2686 
2687   }
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2692 {
2693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2694   PetscErrorCode ierr;
2695 
2696   PetscFunctionBegin;
2697   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 /* -------------------------------------------------------------------*/
2702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2703                                        MatGetRow_MPIAIJ,
2704                                        MatRestoreRow_MPIAIJ,
2705                                        MatMult_MPIAIJ,
2706                                 /* 4*/ MatMultAdd_MPIAIJ,
2707                                        MatMultTranspose_MPIAIJ,
2708                                        MatMultTransposeAdd_MPIAIJ,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                 /*10*/ NULL,
2713                                        NULL,
2714                                        NULL,
2715                                        MatSOR_MPIAIJ,
2716                                        MatTranspose_MPIAIJ,
2717                                 /*15*/ MatGetInfo_MPIAIJ,
2718                                        MatEqual_MPIAIJ,
2719                                        MatGetDiagonal_MPIAIJ,
2720                                        MatDiagonalScale_MPIAIJ,
2721                                        MatNorm_MPIAIJ,
2722                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2723                                        MatAssemblyEnd_MPIAIJ,
2724                                        MatSetOption_MPIAIJ,
2725                                        MatZeroEntries_MPIAIJ,
2726                                 /*24*/ MatZeroRows_MPIAIJ,
2727                                        NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        NULL,
2731                                 /*29*/ MatSetUp_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        MatGetDiagonalBlock_MPIAIJ,
2735                                        NULL,
2736                                 /*34*/ MatDuplicate_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                 /*39*/ MatAXPY_MPIAIJ,
2742                                        MatCreateSubMatrices_MPIAIJ,
2743                                        MatIncreaseOverlap_MPIAIJ,
2744                                        MatGetValues_MPIAIJ,
2745                                        MatCopy_MPIAIJ,
2746                                 /*44*/ MatGetRowMax_MPIAIJ,
2747                                        MatScale_MPIAIJ,
2748                                        MatShift_MPIAIJ,
2749                                        MatDiagonalSet_MPIAIJ,
2750                                        MatZeroRowsColumns_MPIAIJ,
2751                                 /*49*/ MatSetRandom_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2757                                        NULL,
2758                                        MatSetUnfactored_MPIAIJ,
2759                                        MatPermute_MPIAIJ,
2760                                        NULL,
2761                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2762                                        MatDestroy_MPIAIJ,
2763                                        MatView_MPIAIJ,
2764                                        NULL,
2765                                        NULL,
2766                                 /*64*/ NULL,
2767                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2772                                        MatGetRowMinAbs_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                        NULL,
2776                                        NULL,
2777                                 /*75*/ MatFDColoringApply_AIJ,
2778                                        MatSetFromOptions_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        MatFindZeroDiagonals_MPIAIJ,
2782                                 /*80*/ NULL,
2783                                        NULL,
2784                                        NULL,
2785                                 /*83*/ MatLoad_MPIAIJ,
2786                                        MatIsSymmetric_MPIAIJ,
2787                                        NULL,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                 /*89*/ NULL,
2792                                        NULL,
2793                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2794                                        NULL,
2795                                        NULL,
2796                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        MatBindToCPU_MPIAIJ,
2801                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        MatConjugate_MPIAIJ,
2805                                        NULL,
2806                                 /*104*/MatSetValuesRow_MPIAIJ,
2807                                        MatRealPart_MPIAIJ,
2808                                        MatImaginaryPart_MPIAIJ,
2809                                        NULL,
2810                                        NULL,
2811                                 /*109*/NULL,
2812                                        NULL,
2813                                        MatGetRowMin_MPIAIJ,
2814                                        NULL,
2815                                        MatMissingDiagonal_MPIAIJ,
2816                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2817                                        NULL,
2818                                        MatGetGhosts_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        MatGetMultiProcBlock_MPIAIJ,
2826                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2827                                        MatGetColumnReductions_MPIAIJ,
2828                                        MatInvertBlockDiagonal_MPIAIJ,
2829                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2830                                        MatCreateSubMatricesMPI_MPIAIJ,
2831                                 /*129*/NULL,
2832                                        NULL,
2833                                        NULL,
2834                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2835                                        NULL,
2836                                 /*134*/NULL,
2837                                        NULL,
2838                                        NULL,
2839                                        NULL,
2840                                        NULL,
2841                                 /*139*/MatSetBlockSizes_MPIAIJ,
2842                                        NULL,
2843                                        NULL,
2844                                        MatFDColoringSetUp_MPIXAIJ,
2845                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2846                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2847                                 /*145*/NULL,
2848                                        NULL,
2849                                        NULL
2850 };
2851 
2852 /* ----------------------------------------------------------------------------------------*/
2853 
2854 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2855 {
2856   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2857   PetscErrorCode ierr;
2858 
2859   PetscFunctionBegin;
2860   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2861   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2866 {
2867   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2868   PetscErrorCode ierr;
2869 
2870   PetscFunctionBegin;
2871   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2872   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2877 {
2878   Mat_MPIAIJ     *b;
2879   PetscErrorCode ierr;
2880   PetscMPIInt    size;
2881 
2882   PetscFunctionBegin;
2883   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2884   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2885   b = (Mat_MPIAIJ*)B->data;
2886 
2887 #if defined(PETSC_USE_CTABLE)
2888   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2889 #else
2890   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2891 #endif
2892   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2893   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2894   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2895 
2896   /* Because the B will have been resized we simply destroy it and create a new one each time */
2897   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2898   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2899   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2900   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2901   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2902   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2903   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2904 
2905   if (!B->preallocated) {
2906     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2907     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2908     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2909     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2910     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2911   }
2912 
2913   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2914   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2915   B->preallocated  = PETSC_TRUE;
2916   B->was_assembled = PETSC_FALSE;
2917   B->assembled     = PETSC_FALSE;
2918   PetscFunctionReturn(0);
2919 }
2920 
2921 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2922 {
2923   Mat_MPIAIJ     *b;
2924   PetscErrorCode ierr;
2925 
2926   PetscFunctionBegin;
2927   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2928   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2929   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2930   b = (Mat_MPIAIJ*)B->data;
2931 
2932 #if defined(PETSC_USE_CTABLE)
2933   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2934 #else
2935   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2936 #endif
2937   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2938   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2939   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2940 
2941   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2942   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2943   B->preallocated  = PETSC_TRUE;
2944   B->was_assembled = PETSC_FALSE;
2945   B->assembled = PETSC_FALSE;
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2950 {
2951   Mat            mat;
2952   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2953   PetscErrorCode ierr;
2954 
2955   PetscFunctionBegin;
2956   *newmat = NULL;
2957   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2958   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2959   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2960   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2961   a       = (Mat_MPIAIJ*)mat->data;
2962 
2963   mat->factortype   = matin->factortype;
2964   mat->assembled    = matin->assembled;
2965   mat->insertmode   = NOT_SET_VALUES;
2966   mat->preallocated = matin->preallocated;
2967 
2968   a->size         = oldmat->size;
2969   a->rank         = oldmat->rank;
2970   a->donotstash   = oldmat->donotstash;
2971   a->roworiented  = oldmat->roworiented;
2972   a->rowindices   = NULL;
2973   a->rowvalues    = NULL;
2974   a->getrowactive = PETSC_FALSE;
2975 
2976   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2977   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2978 
2979   if (oldmat->colmap) {
2980 #if defined(PETSC_USE_CTABLE)
2981     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2982 #else
2983     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2984     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2985     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2986 #endif
2987   } else a->colmap = NULL;
2988   if (oldmat->garray) {
2989     PetscInt len;
2990     len  = oldmat->B->cmap->n;
2991     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2992     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2993     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2994   } else a->garray = NULL;
2995 
2996   /* It may happen MatDuplicate is called with a non-assembled matrix
2997      In fact, MatDuplicate only requires the matrix to be preallocated
2998      This may happen inside a DMCreateMatrix_Shell */
2999   if (oldmat->lvec) {
3000     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3001     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3002   }
3003   if (oldmat->Mvctx) {
3004     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3005     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3006   }
3007   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3008   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3009   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3010   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3011   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3012   *newmat = mat;
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3017 {
3018   PetscBool      isbinary, ishdf5;
3019   PetscErrorCode ierr;
3020 
3021   PetscFunctionBegin;
3022   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3023   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3024   /* force binary viewer to load .info file if it has not yet done so */
3025   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3026   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3027   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3028   if (isbinary) {
3029     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3030   } else if (ishdf5) {
3031 #if defined(PETSC_HAVE_HDF5)
3032     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3033 #else
3034     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3035 #endif
3036   } else {
3037     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3038   }
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3043 {
3044   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3045   PetscInt       *rowidxs,*colidxs;
3046   PetscScalar    *matvals;
3047   PetscErrorCode ierr;
3048 
3049   PetscFunctionBegin;
3050   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3051 
3052   /* read in matrix header */
3053   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3054   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3055   M  = header[1]; N = header[2]; nz = header[3];
3056   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3057   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3058   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3059 
3060   /* set block sizes from the viewer's .info file */
3061   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3062   /* set global sizes if not set already */
3063   if (mat->rmap->N < 0) mat->rmap->N = M;
3064   if (mat->cmap->N < 0) mat->cmap->N = N;
3065   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3066   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3067 
3068   /* check if the matrix sizes are correct */
3069   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3070   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3071 
3072   /* read in row lengths and build row indices */
3073   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3074   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3075   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3076   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3077   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3078   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3079   /* read in column indices and matrix values */
3080   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3081   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3082   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3083   /* store matrix indices and values */
3084   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3085   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3086   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 /* Not scalable because of ISAllGather() unless getting all columns. */
3091 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3092 {
3093   PetscErrorCode ierr;
3094   IS             iscol_local;
3095   PetscBool      isstride;
3096   PetscMPIInt    lisstride=0,gisstride;
3097 
3098   PetscFunctionBegin;
3099   /* check if we are grabbing all columns*/
3100   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3101 
3102   if (isstride) {
3103     PetscInt  start,len,mstart,mlen;
3104     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3105     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3106     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3107     if (mstart == start && mlen-mstart == len) lisstride = 1;
3108   }
3109 
3110   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3111   if (gisstride) {
3112     PetscInt N;
3113     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3114     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3115     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3116     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3117   } else {
3118     PetscInt cbs;
3119     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3120     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3121     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3122   }
3123 
3124   *isseq = iscol_local;
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 /*
3129  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3130  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3131 
3132  Input Parameters:
3133    mat - matrix
3134    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3135            i.e., mat->rstart <= isrow[i] < mat->rend
3136    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3137            i.e., mat->cstart <= iscol[i] < mat->cend
3138  Output Parameter:
3139    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3140    iscol_o - sequential column index set for retrieving mat->B
3141    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3142  */
3143 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3144 {
3145   PetscErrorCode ierr;
3146   Vec            x,cmap;
3147   const PetscInt *is_idx;
3148   PetscScalar    *xarray,*cmaparray;
3149   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3150   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3151   Mat            B=a->B;
3152   Vec            lvec=a->lvec,lcmap;
3153   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3154   MPI_Comm       comm;
3155   VecScatter     Mvctx=a->Mvctx;
3156 
3157   PetscFunctionBegin;
3158   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3159   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3160 
3161   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3162   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3163   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3164   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3165   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3166 
3167   /* Get start indices */
3168   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3169   isstart -= ncols;
3170   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3171 
3172   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3173   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3174   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3175   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3176   for (i=0; i<ncols; i++) {
3177     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3178     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3179     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3180   }
3181   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3182   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3183   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3184 
3185   /* Get iscol_d */
3186   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3187   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3188   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3189 
3190   /* Get isrow_d */
3191   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3192   rstart = mat->rmap->rstart;
3193   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3194   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3195   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3196   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3197 
3198   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3199   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3200   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3201 
3202   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3203   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3204   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3205 
3206   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3207 
3208   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3209   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3210 
3211   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3212   /* off-process column indices */
3213   count = 0;
3214   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3215   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3216 
3217   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3218   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3219   for (i=0; i<Bn; i++) {
3220     if (PetscRealPart(xarray[i]) > -1.0) {
3221       idx[count]     = i;                   /* local column index in off-diagonal part B */
3222       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3223       count++;
3224     }
3225   }
3226   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3227   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3228 
3229   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3230   /* cannot ensure iscol_o has same blocksize as iscol! */
3231 
3232   ierr = PetscFree(idx);CHKERRQ(ierr);
3233   *garray = cmap1;
3234 
3235   ierr = VecDestroy(&x);CHKERRQ(ierr);
3236   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3237   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3242 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3243 {
3244   PetscErrorCode ierr;
3245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3246   Mat            M = NULL;
3247   MPI_Comm       comm;
3248   IS             iscol_d,isrow_d,iscol_o;
3249   Mat            Asub = NULL,Bsub = NULL;
3250   PetscInt       n;
3251 
3252   PetscFunctionBegin;
3253   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3254 
3255   if (call == MAT_REUSE_MATRIX) {
3256     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3257     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3258     PetscCheckFalse(!isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3259 
3260     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3261     PetscCheckFalse(!iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3262 
3263     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3264     PetscCheckFalse(!iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3265 
3266     /* Update diagonal and off-diagonal portions of submat */
3267     asub = (Mat_MPIAIJ*)(*submat)->data;
3268     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3269     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3270     if (n) {
3271       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3272     }
3273     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3274     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3275 
3276   } else { /* call == MAT_INITIAL_MATRIX) */
3277     const PetscInt *garray;
3278     PetscInt        BsubN;
3279 
3280     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3281     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3282 
3283     /* Create local submatrices Asub and Bsub */
3284     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3285     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3286 
3287     /* Create submatrix M */
3288     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3289 
3290     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3291     asub = (Mat_MPIAIJ*)M->data;
3292 
3293     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3294     n = asub->B->cmap->N;
3295     if (BsubN > n) {
3296       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3297       const PetscInt *idx;
3298       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3299       ierr = PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3300 
3301       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3302       j = 0;
3303       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3304       for (i=0; i<n; i++) {
3305         if (j >= BsubN) break;
3306         while (subgarray[i] > garray[j]) j++;
3307 
3308         if (subgarray[i] == garray[j]) {
3309           idx_new[i] = idx[j++];
3310         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3311       }
3312       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3313 
3314       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3315       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3316 
3317     } else if (BsubN < n) {
3318       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3319     }
3320 
3321     ierr = PetscFree(garray);CHKERRQ(ierr);
3322     *submat = M;
3323 
3324     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3325     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3326     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3327 
3328     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3329     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3330 
3331     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3332     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3333   }
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3338 {
3339   PetscErrorCode ierr;
3340   IS             iscol_local=NULL,isrow_d;
3341   PetscInt       csize;
3342   PetscInt       n,i,j,start,end;
3343   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3344   MPI_Comm       comm;
3345 
3346   PetscFunctionBegin;
3347   /* If isrow has same processor distribution as mat,
3348      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3349   if (call == MAT_REUSE_MATRIX) {
3350     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3351     if (isrow_d) {
3352       sameRowDist  = PETSC_TRUE;
3353       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3354     } else {
3355       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3356       if (iscol_local) {
3357         sameRowDist  = PETSC_TRUE;
3358         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3359       }
3360     }
3361   } else {
3362     /* Check if isrow has same processor distribution as mat */
3363     sameDist[0] = PETSC_FALSE;
3364     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3365     if (!n) {
3366       sameDist[0] = PETSC_TRUE;
3367     } else {
3368       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3369       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3370       if (i >= start && j < end) {
3371         sameDist[0] = PETSC_TRUE;
3372       }
3373     }
3374 
3375     /* Check if iscol has same processor distribution as mat */
3376     sameDist[1] = PETSC_FALSE;
3377     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3378     if (!n) {
3379       sameDist[1] = PETSC_TRUE;
3380     } else {
3381       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3382       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3383       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3384     }
3385 
3386     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3387     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3388     sameRowDist = tsameDist[0];
3389   }
3390 
3391   if (sameRowDist) {
3392     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3393       /* isrow and iscol have same processor distribution as mat */
3394       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3395       PetscFunctionReturn(0);
3396     } else { /* sameRowDist */
3397       /* isrow has same processor distribution as mat */
3398       if (call == MAT_INITIAL_MATRIX) {
3399         PetscBool sorted;
3400         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3401         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3402         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3403         PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i);
3404 
3405         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3406         if (sorted) {
3407           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3408           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3409           PetscFunctionReturn(0);
3410         }
3411       } else { /* call == MAT_REUSE_MATRIX */
3412         IS iscol_sub;
3413         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3414         if (iscol_sub) {
3415           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3416           PetscFunctionReturn(0);
3417         }
3418       }
3419     }
3420   }
3421 
3422   /* General case: iscol -> iscol_local which has global size of iscol */
3423   if (call == MAT_REUSE_MATRIX) {
3424     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3425     PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3426   } else {
3427     if (!iscol_local) {
3428       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3429     }
3430   }
3431 
3432   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3433   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3434 
3435   if (call == MAT_INITIAL_MATRIX) {
3436     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3437     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3438   }
3439   PetscFunctionReturn(0);
3440 }
3441 
3442 /*@C
3443      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3444          and "off-diagonal" part of the matrix in CSR format.
3445 
3446    Collective
3447 
3448    Input Parameters:
3449 +  comm - MPI communicator
3450 .  A - "diagonal" portion of matrix
3451 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3452 -  garray - global index of B columns
3453 
3454    Output Parameter:
3455 .   mat - the matrix, with input A as its local diagonal matrix
3456    Level: advanced
3457 
3458    Notes:
3459        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3460        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3461 
3462 .seealso: MatCreateMPIAIJWithSplitArrays()
3463 @*/
3464 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3465 {
3466   PetscErrorCode    ierr;
3467   Mat_MPIAIJ        *maij;
3468   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3469   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3470   const PetscScalar *oa;
3471   Mat               Bnew;
3472   PetscInt          m,n,N;
3473 
3474   PetscFunctionBegin;
3475   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3476   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3477   PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N);
3478   PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs);
3479   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3480   /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3481 
3482   /* Get global columns of mat */
3483   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3484 
3485   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3486   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3487   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3488   maij = (Mat_MPIAIJ*)(*mat)->data;
3489 
3490   (*mat)->preallocated = PETSC_TRUE;
3491 
3492   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3493   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3494 
3495   /* Set A as diagonal portion of *mat */
3496   maij->A = A;
3497 
3498   nz = oi[m];
3499   for (i=0; i<nz; i++) {
3500     col   = oj[i];
3501     oj[i] = garray[col];
3502   }
3503 
3504   /* Set Bnew as off-diagonal portion of *mat */
3505   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3506   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3507   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3508   bnew        = (Mat_SeqAIJ*)Bnew->data;
3509   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3510   maij->B     = Bnew;
3511 
3512   PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N);
3513 
3514   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3515   b->free_a       = PETSC_FALSE;
3516   b->free_ij      = PETSC_FALSE;
3517   ierr = MatDestroy(&B);CHKERRQ(ierr);
3518 
3519   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3520   bnew->free_a       = PETSC_TRUE;
3521   bnew->free_ij      = PETSC_TRUE;
3522 
3523   /* condense columns of maij->B */
3524   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3525   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3526   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3527   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3528   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3529   PetscFunctionReturn(0);
3530 }
3531 
3532 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3533 
3534 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3535 {
3536   PetscErrorCode ierr;
3537   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3538   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3539   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3540   Mat            M,Msub,B=a->B;
3541   MatScalar      *aa;
3542   Mat_SeqAIJ     *aij;
3543   PetscInt       *garray = a->garray,*colsub,Ncols;
3544   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3545   IS             iscol_sub,iscmap;
3546   const PetscInt *is_idx,*cmap;
3547   PetscBool      allcolumns=PETSC_FALSE;
3548   MPI_Comm       comm;
3549 
3550   PetscFunctionBegin;
3551   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3552   if (call == MAT_REUSE_MATRIX) {
3553     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3554     PetscCheckFalse(!iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3555     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3556 
3557     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3558     PetscCheckFalse(!iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3559 
3560     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3561     PetscCheckFalse(!Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3562 
3563     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3564 
3565   } else { /* call == MAT_INITIAL_MATRIX) */
3566     PetscBool flg;
3567 
3568     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3569     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3570 
3571     /* (1) iscol -> nonscalable iscol_local */
3572     /* Check for special case: each processor gets entire matrix columns */
3573     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3574     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3575     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3576     if (allcolumns) {
3577       iscol_sub = iscol_local;
3578       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3579       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3580 
3581     } else {
3582       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3583       PetscInt *idx,*cmap1,k;
3584       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3585       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3586       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3587       count = 0;
3588       k     = 0;
3589       for (i=0; i<Ncols; i++) {
3590         j = is_idx[i];
3591         if (j >= cstart && j < cend) {
3592           /* diagonal part of mat */
3593           idx[count]     = j;
3594           cmap1[count++] = i; /* column index in submat */
3595         } else if (Bn) {
3596           /* off-diagonal part of mat */
3597           if (j == garray[k]) {
3598             idx[count]     = j;
3599             cmap1[count++] = i;  /* column index in submat */
3600           } else if (j > garray[k]) {
3601             while (j > garray[k] && k < Bn-1) k++;
3602             if (j == garray[k]) {
3603               idx[count]     = j;
3604               cmap1[count++] = i; /* column index in submat */
3605             }
3606           }
3607         }
3608       }
3609       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3610 
3611       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3612       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3613       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3614 
3615       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3616     }
3617 
3618     /* (3) Create sequential Msub */
3619     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3620   }
3621 
3622   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3623   aij  = (Mat_SeqAIJ*)(Msub)->data;
3624   ii   = aij->i;
3625   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3626 
3627   /*
3628       m - number of local rows
3629       Ncols - number of columns (same on all processors)
3630       rstart - first row in new global matrix generated
3631   */
3632   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3633 
3634   if (call == MAT_INITIAL_MATRIX) {
3635     /* (4) Create parallel newmat */
3636     PetscMPIInt    rank,size;
3637     PetscInt       csize;
3638 
3639     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3640     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3641 
3642     /*
3643         Determine the number of non-zeros in the diagonal and off-diagonal
3644         portions of the matrix in order to do correct preallocation
3645     */
3646 
3647     /* first get start and end of "diagonal" columns */
3648     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3649     if (csize == PETSC_DECIDE) {
3650       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3651       if (mglobal == Ncols) { /* square matrix */
3652         nlocal = m;
3653       } else {
3654         nlocal = Ncols/size + ((Ncols % size) > rank);
3655       }
3656     } else {
3657       nlocal = csize;
3658     }
3659     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3660     rstart = rend - nlocal;
3661     PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols);
3662 
3663     /* next, compute all the lengths */
3664     jj    = aij->j;
3665     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3666     olens = dlens + m;
3667     for (i=0; i<m; i++) {
3668       jend = ii[i+1] - ii[i];
3669       olen = 0;
3670       dlen = 0;
3671       for (j=0; j<jend; j++) {
3672         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3673         else dlen++;
3674         jj++;
3675       }
3676       olens[i] = olen;
3677       dlens[i] = dlen;
3678     }
3679 
3680     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3681     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3682 
3683     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3684     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3685     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3686     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3687     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3688     ierr = PetscFree(dlens);CHKERRQ(ierr);
3689 
3690   } else { /* call == MAT_REUSE_MATRIX */
3691     M    = *newmat;
3692     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3693     PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3694     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3695     /*
3696          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3697        rather than the slower MatSetValues().
3698     */
3699     M->was_assembled = PETSC_TRUE;
3700     M->assembled     = PETSC_FALSE;
3701   }
3702 
3703   /* (5) Set values of Msub to *newmat */
3704   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3705   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3706 
3707   jj   = aij->j;
3708   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3709   for (i=0; i<m; i++) {
3710     row = rstart + i;
3711     nz  = ii[i+1] - ii[i];
3712     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3713     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3714     jj += nz; aa += nz;
3715   }
3716   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3717   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3718 
3719   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3720   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3721 
3722   ierr = PetscFree(colsub);CHKERRQ(ierr);
3723 
3724   /* save Msub, iscol_sub and iscmap used in processor for next request */
3725   if (call == MAT_INITIAL_MATRIX) {
3726     *newmat = M;
3727     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3728     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3729 
3730     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3731     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3732 
3733     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3734     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3735 
3736     if (iscol_local) {
3737       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3738       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3739     }
3740   }
3741   PetscFunctionReturn(0);
3742 }
3743 
3744 /*
3745     Not great since it makes two copies of the submatrix, first an SeqAIJ
3746   in local and then by concatenating the local matrices the end result.
3747   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3748 
3749   Note: This requires a sequential iscol with all indices.
3750 */
3751 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3752 {
3753   PetscErrorCode ierr;
3754   PetscMPIInt    rank,size;
3755   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3756   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3757   Mat            M,Mreuse;
3758   MatScalar      *aa,*vwork;
3759   MPI_Comm       comm;
3760   Mat_SeqAIJ     *aij;
3761   PetscBool      colflag,allcolumns=PETSC_FALSE;
3762 
3763   PetscFunctionBegin;
3764   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3765   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3766   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3767 
3768   /* Check for special case: each processor gets entire matrix columns */
3769   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3770   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3771   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3772   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3773 
3774   if (call ==  MAT_REUSE_MATRIX) {
3775     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3776     PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3777     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3778   } else {
3779     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3780   }
3781 
3782   /*
3783       m - number of local rows
3784       n - number of columns (same on all processors)
3785       rstart - first row in new global matrix generated
3786   */
3787   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3788   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3789   if (call == MAT_INITIAL_MATRIX) {
3790     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3791     ii  = aij->i;
3792     jj  = aij->j;
3793 
3794     /*
3795         Determine the number of non-zeros in the diagonal and off-diagonal
3796         portions of the matrix in order to do correct preallocation
3797     */
3798 
3799     /* first get start and end of "diagonal" columns */
3800     if (csize == PETSC_DECIDE) {
3801       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3802       if (mglobal == n) { /* square matrix */
3803         nlocal = m;
3804       } else {
3805         nlocal = n/size + ((n % size) > rank);
3806       }
3807     } else {
3808       nlocal = csize;
3809     }
3810     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3811     rstart = rend - nlocal;
3812     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
3813 
3814     /* next, compute all the lengths */
3815     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3816     olens = dlens + m;
3817     for (i=0; i<m; i++) {
3818       jend = ii[i+1] - ii[i];
3819       olen = 0;
3820       dlen = 0;
3821       for (j=0; j<jend; j++) {
3822         if (*jj < rstart || *jj >= rend) olen++;
3823         else dlen++;
3824         jj++;
3825       }
3826       olens[i] = olen;
3827       dlens[i] = dlen;
3828     }
3829     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3830     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3831     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3832     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3833     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3834     ierr = PetscFree(dlens);CHKERRQ(ierr);
3835   } else {
3836     PetscInt ml,nl;
3837 
3838     M    = *newmat;
3839     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3840     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3841     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3842     /*
3843          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3844        rather than the slower MatSetValues().
3845     */
3846     M->was_assembled = PETSC_TRUE;
3847     M->assembled     = PETSC_FALSE;
3848   }
3849   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3850   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3851   ii   = aij->i;
3852   jj   = aij->j;
3853 
3854   /* trigger copy to CPU if needed */
3855   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3856   for (i=0; i<m; i++) {
3857     row   = rstart + i;
3858     nz    = ii[i+1] - ii[i];
3859     cwork = jj; jj += nz;
3860     vwork = aa; aa += nz;
3861     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3862   }
3863   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3864 
3865   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3866   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3867   *newmat = M;
3868 
3869   /* save submatrix used in processor for next request */
3870   if (call ==  MAT_INITIAL_MATRIX) {
3871     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3872     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3873   }
3874   PetscFunctionReturn(0);
3875 }
3876 
3877 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3878 {
3879   PetscInt       m,cstart, cend,j,nnz,i,d;
3880   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3881   const PetscInt *JJ;
3882   PetscErrorCode ierr;
3883   PetscBool      nooffprocentries;
3884 
3885   PetscFunctionBegin;
3886   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]);
3887 
3888   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3889   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3890   m      = B->rmap->n;
3891   cstart = B->cmap->rstart;
3892   cend   = B->cmap->rend;
3893   rstart = B->rmap->rstart;
3894 
3895   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3896 
3897   if (PetscDefined(USE_DEBUG)) {
3898     for (i=0; i<m; i++) {
3899       nnz = Ii[i+1]- Ii[i];
3900       JJ  = J + Ii[i];
3901       PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz);
3902       PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]);
3903       PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N);
3904     }
3905   }
3906 
3907   for (i=0; i<m; i++) {
3908     nnz     = Ii[i+1]- Ii[i];
3909     JJ      = J + Ii[i];
3910     nnz_max = PetscMax(nnz_max,nnz);
3911     d       = 0;
3912     for (j=0; j<nnz; j++) {
3913       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3914     }
3915     d_nnz[i] = d;
3916     o_nnz[i] = nnz - d;
3917   }
3918   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3919   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3920 
3921   for (i=0; i<m; i++) {
3922     ii   = i + rstart;
3923     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3924   }
3925   nooffprocentries    = B->nooffprocentries;
3926   B->nooffprocentries = PETSC_TRUE;
3927   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3928   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3929   B->nooffprocentries = nooffprocentries;
3930 
3931   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 /*@
3936    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3937    (the default parallel PETSc format).
3938 
3939    Collective
3940 
3941    Input Parameters:
3942 +  B - the matrix
3943 .  i - the indices into j for the start of each local row (starts with zero)
3944 .  j - the column indices for each local row (starts with zero)
3945 -  v - optional values in the matrix
3946 
3947    Level: developer
3948 
3949    Notes:
3950        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3951      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3952      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3953 
3954        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3955 
3956        The format which is used for the sparse matrix input, is equivalent to a
3957     row-major ordering.. i.e for the following matrix, the input data expected is
3958     as shown
3959 
3960 $        1 0 0
3961 $        2 0 3     P0
3962 $       -------
3963 $        4 5 6     P1
3964 $
3965 $     Process0 [P0]: rows_owned=[0,1]
3966 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3967 $        j =  {0,0,2}  [size = 3]
3968 $        v =  {1,2,3}  [size = 3]
3969 $
3970 $     Process1 [P1]: rows_owned=[2]
3971 $        i =  {0,3}    [size = nrow+1  = 1+1]
3972 $        j =  {0,1,2}  [size = 3]
3973 $        v =  {4,5,6}  [size = 3]
3974 
3975 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3976           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3977 @*/
3978 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3979 {
3980   PetscErrorCode ierr;
3981 
3982   PetscFunctionBegin;
3983   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3984   PetscFunctionReturn(0);
3985 }
3986 
3987 /*@C
3988    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3989    (the default parallel PETSc format).  For good matrix assembly performance
3990    the user should preallocate the matrix storage by setting the parameters
3991    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3992    performance can be increased by more than a factor of 50.
3993 
3994    Collective
3995 
3996    Input Parameters:
3997 +  B - the matrix
3998 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3999            (same value is used for all local rows)
4000 .  d_nnz - array containing the number of nonzeros in the various rows of the
4001            DIAGONAL portion of the local submatrix (possibly different for each row)
4002            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4003            The size of this array is equal to the number of local rows, i.e 'm'.
4004            For matrices that will be factored, you must leave room for (and set)
4005            the diagonal entry even if it is zero.
4006 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4007            submatrix (same value is used for all local rows).
4008 -  o_nnz - array containing the number of nonzeros in the various rows of the
4009            OFF-DIAGONAL portion of the local submatrix (possibly different for
4010            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4011            structure. The size of this array is equal to the number
4012            of local rows, i.e 'm'.
4013 
4014    If the *_nnz parameter is given then the *_nz parameter is ignored
4015 
4016    The AIJ format (also called the Yale sparse matrix format or
4017    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4018    storage.  The stored row and column indices begin with zero.
4019    See Users-Manual: ch_mat for details.
4020 
4021    The parallel matrix is partitioned such that the first m0 rows belong to
4022    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4023    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4024 
4025    The DIAGONAL portion of the local submatrix of a processor can be defined
4026    as the submatrix which is obtained by extraction the part corresponding to
4027    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4028    first row that belongs to the processor, r2 is the last row belonging to
4029    the this processor, and c1-c2 is range of indices of the local part of a
4030    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4031    common case of a square matrix, the row and column ranges are the same and
4032    the DIAGONAL part is also square. The remaining portion of the local
4033    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4034 
4035    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4036 
4037    You can call MatGetInfo() to get information on how effective the preallocation was;
4038    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4039    You can also run with the option -info and look for messages with the string
4040    malloc in them to see if additional memory allocation was needed.
4041 
4042    Example usage:
4043 
4044    Consider the following 8x8 matrix with 34 non-zero values, that is
4045    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4046    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4047    as follows:
4048 
4049 .vb
4050             1  2  0  |  0  3  0  |  0  4
4051     Proc0   0  5  6  |  7  0  0  |  8  0
4052             9  0 10  | 11  0  0  | 12  0
4053     -------------------------------------
4054            13  0 14  | 15 16 17  |  0  0
4055     Proc1   0 18  0  | 19 20 21  |  0  0
4056             0  0  0  | 22 23  0  | 24  0
4057     -------------------------------------
4058     Proc2  25 26 27  |  0  0 28  | 29  0
4059            30  0  0  | 31 32 33  |  0 34
4060 .ve
4061 
4062    This can be represented as a collection of submatrices as:
4063 
4064 .vb
4065       A B C
4066       D E F
4067       G H I
4068 .ve
4069 
4070    Where the submatrices A,B,C are owned by proc0, D,E,F are
4071    owned by proc1, G,H,I are owned by proc2.
4072 
4073    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4074    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4075    The 'M','N' parameters are 8,8, and have the same values on all procs.
4076 
4077    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4078    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4079    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4080    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4081    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4082    matrix, ans [DF] as another SeqAIJ matrix.
4083 
4084    When d_nz, o_nz parameters are specified, d_nz storage elements are
4085    allocated for every row of the local diagonal submatrix, and o_nz
4086    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4087    One way to choose d_nz and o_nz is to use the max nonzerors per local
4088    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4089    In this case, the values of d_nz,o_nz are:
4090 .vb
4091      proc0 : dnz = 2, o_nz = 2
4092      proc1 : dnz = 3, o_nz = 2
4093      proc2 : dnz = 1, o_nz = 4
4094 .ve
4095    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4096    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4097    for proc3. i.e we are using 12+15+10=37 storage locations to store
4098    34 values.
4099 
4100    When d_nnz, o_nnz parameters are specified, the storage is specified
4101    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4102    In the above case the values for d_nnz,o_nnz are:
4103 .vb
4104      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4105      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4106      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4107 .ve
4108    Here the space allocated is sum of all the above values i.e 34, and
4109    hence pre-allocation is perfect.
4110 
4111    Level: intermediate
4112 
4113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4114           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4115 @*/
4116 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4117 {
4118   PetscErrorCode ierr;
4119 
4120   PetscFunctionBegin;
4121   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4122   PetscValidType(B,1);
4123   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4124   PetscFunctionReturn(0);
4125 }
4126 
4127 /*@
4128      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4129          CSR format for the local rows.
4130 
4131    Collective
4132 
4133    Input Parameters:
4134 +  comm - MPI communicator
4135 .  m - number of local rows (Cannot be PETSC_DECIDE)
4136 .  n - This value should be the same as the local size used in creating the
4137        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4138        calculated if N is given) For square matrices n is almost always m.
4139 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4140 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4141 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4142 .   j - column indices
4143 -   a - matrix values
4144 
4145    Output Parameter:
4146 .   mat - the matrix
4147 
4148    Level: intermediate
4149 
4150    Notes:
4151        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4152      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4153      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4154 
4155        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4156 
4157        The format which is used for the sparse matrix input, is equivalent to a
4158     row-major ordering.. i.e for the following matrix, the input data expected is
4159     as shown
4160 
4161        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4162 
4163 $        1 0 0
4164 $        2 0 3     P0
4165 $       -------
4166 $        4 5 6     P1
4167 $
4168 $     Process0 [P0]: rows_owned=[0,1]
4169 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4170 $        j =  {0,0,2}  [size = 3]
4171 $        v =  {1,2,3}  [size = 3]
4172 $
4173 $     Process1 [P1]: rows_owned=[2]
4174 $        i =  {0,3}    [size = nrow+1  = 1+1]
4175 $        j =  {0,1,2}  [size = 3]
4176 $        v =  {4,5,6}  [size = 3]
4177 
4178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4179           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4180 @*/
4181 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4182 {
4183   PetscErrorCode ierr;
4184 
4185   PetscFunctionBegin;
4186   PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4187   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4188   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4189   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4190   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4191   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4192   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4193   PetscFunctionReturn(0);
4194 }
4195 
4196 /*@
4197      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4198          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4199 
4200    Collective
4201 
4202    Input Parameters:
4203 +  mat - the matrix
4204 .  m - number of local rows (Cannot be PETSC_DECIDE)
4205 .  n - This value should be the same as the local size used in creating the
4206        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4207        calculated if N is given) For square matrices n is almost always m.
4208 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4209 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4210 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4211 .  J - column indices
4212 -  v - matrix values
4213 
4214    Level: intermediate
4215 
4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4217           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4218 @*/
4219 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4220 {
4221   PetscErrorCode ierr;
4222   PetscInt       cstart,nnz,i,j;
4223   PetscInt       *ld;
4224   PetscBool      nooffprocentries;
4225   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4226   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4227   PetscScalar    *ad,*ao;
4228   const PetscInt *Adi = Ad->i;
4229   PetscInt       ldi,Iii,md;
4230 
4231   PetscFunctionBegin;
4232   PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4233   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4234   PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4235   PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4236 
4237   ierr = MatSeqAIJGetArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4238   ierr = MatSeqAIJGetArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4239   cstart = mat->cmap->rstart;
4240   if (!Aij->ld) {
4241     /* count number of entries below block diagonal */
4242     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4243     Aij->ld = ld;
4244     for (i=0; i<m; i++) {
4245       nnz  = Ii[i+1]- Ii[i];
4246       j     = 0;
4247       while  (J[j] < cstart && j < nnz) {j++;}
4248       J    += nnz;
4249       ld[i] = j;
4250     }
4251   } else {
4252     ld = Aij->ld;
4253   }
4254 
4255   for (i=0; i<m; i++) {
4256     nnz  = Ii[i+1]- Ii[i];
4257     Iii  = Ii[i];
4258     ldi  = ld[i];
4259     md   = Adi[i+1]-Adi[i];
4260     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4261     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4262     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4263     ad  += md;
4264     ao  += nnz - md;
4265   }
4266   nooffprocentries      = mat->nooffprocentries;
4267   mat->nooffprocentries = PETSC_TRUE;
4268   ierr = MatSeqAIJRestoreArrayWrite(Aij->A,&ad);CHKERRQ(ierr);
4269   ierr = MatSeqAIJRestoreArrayWrite(Aij->B,&ao);CHKERRQ(ierr);
4270   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4271   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4272   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4273   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4274   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4275   mat->nooffprocentries = nooffprocentries;
4276   PetscFunctionReturn(0);
4277 }
4278 
4279 /*@C
4280    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4281    (the default parallel PETSc format).  For good matrix assembly performance
4282    the user should preallocate the matrix storage by setting the parameters
4283    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4284    performance can be increased by more than a factor of 50.
4285 
4286    Collective
4287 
4288    Input Parameters:
4289 +  comm - MPI communicator
4290 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4291            This value should be the same as the local size used in creating the
4292            y vector for the matrix-vector product y = Ax.
4293 .  n - This value should be the same as the local size used in creating the
4294        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4295        calculated if N is given) For square matrices n is almost always m.
4296 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4297 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4298 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4299            (same value is used for all local rows)
4300 .  d_nnz - array containing the number of nonzeros in the various rows of the
4301            DIAGONAL portion of the local submatrix (possibly different for each row)
4302            or NULL, if d_nz is used to specify the nonzero structure.
4303            The size of this array is equal to the number of local rows, i.e 'm'.
4304 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4305            submatrix (same value is used for all local rows).
4306 -  o_nnz - array containing the number of nonzeros in the various rows of the
4307            OFF-DIAGONAL portion of the local submatrix (possibly different for
4308            each row) or NULL, if o_nz is used to specify the nonzero
4309            structure. The size of this array is equal to the number
4310            of local rows, i.e 'm'.
4311 
4312    Output Parameter:
4313 .  A - the matrix
4314 
4315    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4316    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4317    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4318 
4319    Notes:
4320    If the *_nnz parameter is given then the *_nz parameter is ignored
4321 
4322    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4323    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4324    storage requirements for this matrix.
4325 
4326    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4327    processor than it must be used on all processors that share the object for
4328    that argument.
4329 
4330    The user MUST specify either the local or global matrix dimensions
4331    (possibly both).
4332 
4333    The parallel matrix is partitioned across processors such that the
4334    first m0 rows belong to process 0, the next m1 rows belong to
4335    process 1, the next m2 rows belong to process 2 etc.. where
4336    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4337    values corresponding to [m x N] submatrix.
4338 
4339    The columns are logically partitioned with the n0 columns belonging
4340    to 0th partition, the next n1 columns belonging to the next
4341    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4342 
4343    The DIAGONAL portion of the local submatrix on any given processor
4344    is the submatrix corresponding to the rows and columns m,n
4345    corresponding to the given processor. i.e diagonal matrix on
4346    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4347    etc. The remaining portion of the local submatrix [m x (N-n)]
4348    constitute the OFF-DIAGONAL portion. The example below better
4349    illustrates this concept.
4350 
4351    For a square global matrix we define each processor's diagonal portion
4352    to be its local rows and the corresponding columns (a square submatrix);
4353    each processor's off-diagonal portion encompasses the remainder of the
4354    local matrix (a rectangular submatrix).
4355 
4356    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4357 
4358    When calling this routine with a single process communicator, a matrix of
4359    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4360    type of communicator, use the construction mechanism
4361 .vb
4362      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4363 .ve
4364 
4365 $     MatCreate(...,&A);
4366 $     MatSetType(A,MATMPIAIJ);
4367 $     MatSetSizes(A, m,n,M,N);
4368 $     MatMPIAIJSetPreallocation(A,...);
4369 
4370    By default, this format uses inodes (identical nodes) when possible.
4371    We search for consecutive rows with the same nonzero structure, thereby
4372    reusing matrix information to achieve increased efficiency.
4373 
4374    Options Database Keys:
4375 +  -mat_no_inode  - Do not use inodes
4376 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4377 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4378         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4379         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4380 
4381    Example usage:
4382 
4383    Consider the following 8x8 matrix with 34 non-zero values, that is
4384    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4385    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4386    as follows
4387 
4388 .vb
4389             1  2  0  |  0  3  0  |  0  4
4390     Proc0   0  5  6  |  7  0  0  |  8  0
4391             9  0 10  | 11  0  0  | 12  0
4392     -------------------------------------
4393            13  0 14  | 15 16 17  |  0  0
4394     Proc1   0 18  0  | 19 20 21  |  0  0
4395             0  0  0  | 22 23  0  | 24  0
4396     -------------------------------------
4397     Proc2  25 26 27  |  0  0 28  | 29  0
4398            30  0  0  | 31 32 33  |  0 34
4399 .ve
4400 
4401    This can be represented as a collection of submatrices as
4402 
4403 .vb
4404       A B C
4405       D E F
4406       G H I
4407 .ve
4408 
4409    Where the submatrices A,B,C are owned by proc0, D,E,F are
4410    owned by proc1, G,H,I are owned by proc2.
4411 
4412    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4413    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4414    The 'M','N' parameters are 8,8, and have the same values on all procs.
4415 
4416    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4417    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4418    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4419    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4420    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4421    matrix, ans [DF] as another SeqAIJ matrix.
4422 
4423    When d_nz, o_nz parameters are specified, d_nz storage elements are
4424    allocated for every row of the local diagonal submatrix, and o_nz
4425    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4426    One way to choose d_nz and o_nz is to use the max nonzerors per local
4427    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4428    In this case, the values of d_nz,o_nz are
4429 .vb
4430      proc0 : dnz = 2, o_nz = 2
4431      proc1 : dnz = 3, o_nz = 2
4432      proc2 : dnz = 1, o_nz = 4
4433 .ve
4434    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4435    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4436    for proc3. i.e we are using 12+15+10=37 storage locations to store
4437    34 values.
4438 
4439    When d_nnz, o_nnz parameters are specified, the storage is specified
4440    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4441    In the above case the values for d_nnz,o_nnz are
4442 .vb
4443      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4444      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4445      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4446 .ve
4447    Here the space allocated is sum of all the above values i.e 34, and
4448    hence pre-allocation is perfect.
4449 
4450    Level: intermediate
4451 
4452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4453           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4454 @*/
4455 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4456 {
4457   PetscErrorCode ierr;
4458   PetscMPIInt    size;
4459 
4460   PetscFunctionBegin;
4461   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4462   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4463   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4464   if (size > 1) {
4465     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4466     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4467   } else {
4468     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4469     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4470   }
4471   PetscFunctionReturn(0);
4472 }
4473 
4474 /*@C
4475   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4476 
4477   Not collective
4478 
4479   Input Parameter:
4480 . A - The MPIAIJ matrix
4481 
4482   Output Parameters:
4483 + Ad - The local diagonal block as a SeqAIJ matrix
4484 . Ao - The local off-diagonal block as a SeqAIJ matrix
4485 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4486 
4487   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4488   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4489   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4490   local column numbers to global column numbers in the original matrix.
4491 
4492   Level: intermediate
4493 
4494 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4495 @*/
4496 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4497 {
4498   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4499   PetscBool      flg;
4500   PetscErrorCode ierr;
4501 
4502   PetscFunctionBegin;
4503   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4504   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4505   if (Ad)     *Ad     = a->A;
4506   if (Ao)     *Ao     = a->B;
4507   if (colmap) *colmap = a->garray;
4508   PetscFunctionReturn(0);
4509 }
4510 
4511 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4512 {
4513   PetscErrorCode ierr;
4514   PetscInt       m,N,i,rstart,nnz,Ii;
4515   PetscInt       *indx;
4516   PetscScalar    *values;
4517   MatType        rootType;
4518 
4519   PetscFunctionBegin;
4520   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4521   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4522     PetscInt       *dnz,*onz,sum,bs,cbs;
4523 
4524     if (n == PETSC_DECIDE) {
4525       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4526     }
4527     /* Check sum(n) = N */
4528     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4529     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
4530 
4531     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4532     rstart -= m;
4533 
4534     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4535     for (i=0; i<m; i++) {
4536       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4537       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4538       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4539     }
4540 
4541     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4542     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4543     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4544     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4545     ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr);
4546     ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr);
4547     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4548     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4549     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4550     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4551   }
4552 
4553   /* numeric phase */
4554   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4555   for (i=0; i<m; i++) {
4556     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4557     Ii   = i + rstart;
4558     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4559     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4560   }
4561   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4562   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4563   PetscFunctionReturn(0);
4564 }
4565 
4566 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4567 {
4568   PetscErrorCode    ierr;
4569   PetscMPIInt       rank;
4570   PetscInt          m,N,i,rstart,nnz;
4571   size_t            len;
4572   const PetscInt    *indx;
4573   PetscViewer       out;
4574   char              *name;
4575   Mat               B;
4576   const PetscScalar *values;
4577 
4578   PetscFunctionBegin;
4579   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4580   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4581   /* Should this be the type of the diagonal block of A? */
4582   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4583   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4584   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4585   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4586   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4587   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4588   for (i=0; i<m; i++) {
4589     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4590     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4591     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4592   }
4593   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4594   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4595 
4596   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4597   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4598   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4599   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4600   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4601   ierr = PetscFree(name);CHKERRQ(ierr);
4602   ierr = MatView(B,out);CHKERRQ(ierr);
4603   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4604   ierr = MatDestroy(&B);CHKERRQ(ierr);
4605   PetscFunctionReturn(0);
4606 }
4607 
4608 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4609 {
4610   PetscErrorCode      ierr;
4611   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4612 
4613   PetscFunctionBegin;
4614   if (!merge) PetscFunctionReturn(0);
4615   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4616   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4617   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4618   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4619   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4620   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4621   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4622   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4623   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4624   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4625   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4626   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4627   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4628   ierr = PetscFree(merge);CHKERRQ(ierr);
4629   PetscFunctionReturn(0);
4630 }
4631 
4632 #include <../src/mat/utils/freespace.h>
4633 #include <petscbt.h>
4634 
4635 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4636 {
4637   PetscErrorCode      ierr;
4638   MPI_Comm            comm;
4639   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4640   PetscMPIInt         size,rank,taga,*len_s;
4641   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4642   PetscInt            proc,m;
4643   PetscInt            **buf_ri,**buf_rj;
4644   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4645   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4646   MPI_Request         *s_waits,*r_waits;
4647   MPI_Status          *status;
4648   const MatScalar     *aa,*a_a;
4649   MatScalar           **abuf_r,*ba_i;
4650   Mat_Merge_SeqsToMPI *merge;
4651   PetscContainer      container;
4652 
4653   PetscFunctionBegin;
4654   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4655   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4656 
4657   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4658   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4659 
4660   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4661   PetscCheckFalse(!container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4662   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4663   ierr = MatSeqAIJGetArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4664   aa   = a_a;
4665 
4666   bi     = merge->bi;
4667   bj     = merge->bj;
4668   buf_ri = merge->buf_ri;
4669   buf_rj = merge->buf_rj;
4670 
4671   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4672   owners = merge->rowmap->range;
4673   len_s  = merge->len_s;
4674 
4675   /* send and recv matrix values */
4676   /*-----------------------------*/
4677   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4678   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4679 
4680   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4681   for (proc=0,k=0; proc<size; proc++) {
4682     if (!len_s[proc]) continue;
4683     i    = owners[proc];
4684     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4685     k++;
4686   }
4687 
4688   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4689   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4690   ierr = PetscFree(status);CHKERRQ(ierr);
4691 
4692   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4693   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4694 
4695   /* insert mat values of mpimat */
4696   /*----------------------------*/
4697   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4698   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4699 
4700   for (k=0; k<merge->nrecv; k++) {
4701     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4702     nrows       = *(buf_ri_k[k]);
4703     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4704     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4705   }
4706 
4707   /* set values of ba */
4708   m    = merge->rowmap->n;
4709   for (i=0; i<m; i++) {
4710     arow = owners[rank] + i;
4711     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4712     bnzi = bi[i+1] - bi[i];
4713     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4714 
4715     /* add local non-zero vals of this proc's seqmat into ba */
4716     anzi   = ai[arow+1] - ai[arow];
4717     aj     = a->j + ai[arow];
4718     aa     = a_a + ai[arow];
4719     nextaj = 0;
4720     for (j=0; nextaj<anzi; j++) {
4721       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4722         ba_i[j] += aa[nextaj++];
4723       }
4724     }
4725 
4726     /* add received vals into ba */
4727     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4728       /* i-th row */
4729       if (i == *nextrow[k]) {
4730         anzi   = *(nextai[k]+1) - *nextai[k];
4731         aj     = buf_rj[k] + *(nextai[k]);
4732         aa     = abuf_r[k] + *(nextai[k]);
4733         nextaj = 0;
4734         for (j=0; nextaj<anzi; j++) {
4735           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4736             ba_i[j] += aa[nextaj++];
4737           }
4738         }
4739         nextrow[k]++; nextai[k]++;
4740       }
4741     }
4742     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4743   }
4744   ierr = MatSeqAIJRestoreArrayRead(seqmat,&a_a);CHKERRQ(ierr);
4745   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4746   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4747 
4748   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4749   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4750   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4751   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4752   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4753   PetscFunctionReturn(0);
4754 }
4755 
4756 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4757 {
4758   PetscErrorCode      ierr;
4759   Mat                 B_mpi;
4760   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4761   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4762   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4763   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4764   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4765   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4766   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4767   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4768   MPI_Status          *status;
4769   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4770   PetscBT             lnkbt;
4771   Mat_Merge_SeqsToMPI *merge;
4772   PetscContainer      container;
4773 
4774   PetscFunctionBegin;
4775   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4776 
4777   /* make sure it is a PETSc comm */
4778   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4779   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4780   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4781 
4782   ierr = PetscNew(&merge);CHKERRQ(ierr);
4783   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4784 
4785   /* determine row ownership */
4786   /*---------------------------------------------------------*/
4787   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4788   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4789   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4790   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4791   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4792   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4793   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4794 
4795   m      = merge->rowmap->n;
4796   owners = merge->rowmap->range;
4797 
4798   /* determine the number of messages to send, their lengths */
4799   /*---------------------------------------------------------*/
4800   len_s = merge->len_s;
4801 
4802   len          = 0; /* length of buf_si[] */
4803   merge->nsend = 0;
4804   for (proc=0; proc<size; proc++) {
4805     len_si[proc] = 0;
4806     if (proc == rank) {
4807       len_s[proc] = 0;
4808     } else {
4809       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4810       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4811     }
4812     if (len_s[proc]) {
4813       merge->nsend++;
4814       nrows = 0;
4815       for (i=owners[proc]; i<owners[proc+1]; i++) {
4816         if (ai[i+1] > ai[i]) nrows++;
4817       }
4818       len_si[proc] = 2*(nrows+1);
4819       len         += len_si[proc];
4820     }
4821   }
4822 
4823   /* determine the number and length of messages to receive for ij-structure */
4824   /*-------------------------------------------------------------------------*/
4825   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4826   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4827 
4828   /* post the Irecv of j-structure */
4829   /*-------------------------------*/
4830   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4831   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4832 
4833   /* post the Isend of j-structure */
4834   /*--------------------------------*/
4835   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4836 
4837   for (proc=0, k=0; proc<size; proc++) {
4838     if (!len_s[proc]) continue;
4839     i    = owners[proc];
4840     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4841     k++;
4842   }
4843 
4844   /* receives and sends of j-structure are complete */
4845   /*------------------------------------------------*/
4846   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4847   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4848 
4849   /* send and recv i-structure */
4850   /*---------------------------*/
4851   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4852   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4853 
4854   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4855   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4856   for (proc=0,k=0; proc<size; proc++) {
4857     if (!len_s[proc]) continue;
4858     /* form outgoing message for i-structure:
4859          buf_si[0]:                 nrows to be sent
4860                [1:nrows]:           row index (global)
4861                [nrows+1:2*nrows+1]: i-structure index
4862     */
4863     /*-------------------------------------------*/
4864     nrows       = len_si[proc]/2 - 1;
4865     buf_si_i    = buf_si + nrows+1;
4866     buf_si[0]   = nrows;
4867     buf_si_i[0] = 0;
4868     nrows       = 0;
4869     for (i=owners[proc]; i<owners[proc+1]; i++) {
4870       anzi = ai[i+1] - ai[i];
4871       if (anzi) {
4872         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4873         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4874         nrows++;
4875       }
4876     }
4877     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4878     k++;
4879     buf_si += len_si[proc];
4880   }
4881 
4882   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4883   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4884 
4885   ierr = PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4886   for (i=0; i<merge->nrecv; i++) {
4887     ierr = PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4888   }
4889 
4890   ierr = PetscFree(len_si);CHKERRQ(ierr);
4891   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4892   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4893   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4894   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4895   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4896   ierr = PetscFree(status);CHKERRQ(ierr);
4897 
4898   /* compute a local seq matrix in each processor */
4899   /*----------------------------------------------*/
4900   /* allocate bi array and free space for accumulating nonzero column info */
4901   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4902   bi[0] = 0;
4903 
4904   /* create and initialize a linked list */
4905   nlnk = N+1;
4906   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4907 
4908   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4909   len  = ai[owners[rank+1]] - ai[owners[rank]];
4910   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4911 
4912   current_space = free_space;
4913 
4914   /* determine symbolic info for each local row */
4915   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4916 
4917   for (k=0; k<merge->nrecv; k++) {
4918     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4919     nrows       = *buf_ri_k[k];
4920     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4921     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4922   }
4923 
4924   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4925   len  = 0;
4926   for (i=0; i<m; i++) {
4927     bnzi = 0;
4928     /* add local non-zero cols of this proc's seqmat into lnk */
4929     arow  = owners[rank] + i;
4930     anzi  = ai[arow+1] - ai[arow];
4931     aj    = a->j + ai[arow];
4932     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4933     bnzi += nlnk;
4934     /* add received col data into lnk */
4935     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4936       if (i == *nextrow[k]) { /* i-th row */
4937         anzi  = *(nextai[k]+1) - *nextai[k];
4938         aj    = buf_rj[k] + *nextai[k];
4939         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4940         bnzi += nlnk;
4941         nextrow[k]++; nextai[k]++;
4942       }
4943     }
4944     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4945 
4946     /* if free space is not available, make more free space */
4947     if (current_space->local_remaining<bnzi) {
4948       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4949       nspacedouble++;
4950     }
4951     /* copy data into free space, then initialize lnk */
4952     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4953     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4954 
4955     current_space->array           += bnzi;
4956     current_space->local_used      += bnzi;
4957     current_space->local_remaining -= bnzi;
4958 
4959     bi[i+1] = bi[i] + bnzi;
4960   }
4961 
4962   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4963 
4964   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4965   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4966   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4967 
4968   /* create symbolic parallel matrix B_mpi */
4969   /*---------------------------------------*/
4970   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4971   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4972   if (n==PETSC_DECIDE) {
4973     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4974   } else {
4975     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4976   }
4977   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4978   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4979   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4980   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4981   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4982 
4983   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4984   B_mpi->assembled  = PETSC_FALSE;
4985   merge->bi         = bi;
4986   merge->bj         = bj;
4987   merge->buf_ri     = buf_ri;
4988   merge->buf_rj     = buf_rj;
4989   merge->coi        = NULL;
4990   merge->coj        = NULL;
4991   merge->owners_co  = NULL;
4992 
4993   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4994 
4995   /* attach the supporting struct to B_mpi for reuse */
4996   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4997   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4998   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4999   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5000   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5001   *mpimat = B_mpi;
5002 
5003   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5004   PetscFunctionReturn(0);
5005 }
5006 
5007 /*@C
5008       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5009                  matrices from each processor
5010 
5011     Collective
5012 
5013    Input Parameters:
5014 +    comm - the communicators the parallel matrix will live on
5015 .    seqmat - the input sequential matrices
5016 .    m - number of local rows (or PETSC_DECIDE)
5017 .    n - number of local columns (or PETSC_DECIDE)
5018 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5019 
5020    Output Parameter:
5021 .    mpimat - the parallel matrix generated
5022 
5023     Level: advanced
5024 
5025    Notes:
5026      The dimensions of the sequential matrix in each processor MUST be the same.
5027      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5028      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5029 @*/
5030 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5031 {
5032   PetscErrorCode ierr;
5033   PetscMPIInt    size;
5034 
5035   PetscFunctionBegin;
5036   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5037   if (size == 1) {
5038     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5039     if (scall == MAT_INITIAL_MATRIX) {
5040       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5041     } else {
5042       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5043     }
5044     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5045     PetscFunctionReturn(0);
5046   }
5047   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5048   if (scall == MAT_INITIAL_MATRIX) {
5049     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5050   }
5051   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5052   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5053   PetscFunctionReturn(0);
5054 }
5055 
5056 /*@
5057      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5058           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5059           with MatGetSize()
5060 
5061     Not Collective
5062 
5063    Input Parameters:
5064 +    A - the matrix
5065 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5066 
5067    Output Parameter:
5068 .    A_loc - the local sequential matrix generated
5069 
5070     Level: developer
5071 
5072    Notes:
5073      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5074      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5075      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5076      modify the values of the returned A_loc.
5077 
5078 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5079 @*/
5080 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5081 {
5082   PetscErrorCode    ierr;
5083   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5084   Mat_SeqAIJ        *mat,*a,*b;
5085   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5086   const PetscScalar *aa,*ba,*aav,*bav;
5087   PetscScalar       *ca,*cam;
5088   PetscMPIInt       size;
5089   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5090   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5091   PetscBool         match;
5092 
5093   PetscFunctionBegin;
5094   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5095   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5096   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5097   if (size == 1) {
5098     if (scall == MAT_INITIAL_MATRIX) {
5099       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5100       *A_loc = mpimat->A;
5101     } else if (scall == MAT_REUSE_MATRIX) {
5102       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5103     }
5104     PetscFunctionReturn(0);
5105   }
5106 
5107   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5108   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5109   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5110   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5111   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5112   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5113   aa   = aav;
5114   ba   = bav;
5115   if (scall == MAT_INITIAL_MATRIX) {
5116     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5117     ci[0] = 0;
5118     for (i=0; i<am; i++) {
5119       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5120     }
5121     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5122     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5123     k    = 0;
5124     for (i=0; i<am; i++) {
5125       ncols_o = bi[i+1] - bi[i];
5126       ncols_d = ai[i+1] - ai[i];
5127       /* off-diagonal portion of A */
5128       for (jo=0; jo<ncols_o; jo++) {
5129         col = cmap[*bj];
5130         if (col >= cstart) break;
5131         cj[k]   = col; bj++;
5132         ca[k++] = *ba++;
5133       }
5134       /* diagonal portion of A */
5135       for (j=0; j<ncols_d; j++) {
5136         cj[k]   = cstart + *aj++;
5137         ca[k++] = *aa++;
5138       }
5139       /* off-diagonal portion of A */
5140       for (j=jo; j<ncols_o; j++) {
5141         cj[k]   = cmap[*bj++];
5142         ca[k++] = *ba++;
5143       }
5144     }
5145     /* put together the new matrix */
5146     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5147     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5148     /* Since these are PETSc arrays, change flags to free them as necessary. */
5149     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5150     mat->free_a  = PETSC_TRUE;
5151     mat->free_ij = PETSC_TRUE;
5152     mat->nonew   = 0;
5153   } else if (scall == MAT_REUSE_MATRIX) {
5154     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
5155     ci   = mat->i;
5156     cj   = mat->j;
5157     ierr = MatSeqAIJGetArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5158     for (i=0; i<am; i++) {
5159       /* off-diagonal portion of A */
5160       ncols_o = bi[i+1] - bi[i];
5161       for (jo=0; jo<ncols_o; jo++) {
5162         col = cmap[*bj];
5163         if (col >= cstart) break;
5164         *cam++ = *ba++; bj++;
5165       }
5166       /* diagonal portion of A */
5167       ncols_d = ai[i+1] - ai[i];
5168       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5169       /* off-diagonal portion of A */
5170       for (j=jo; j<ncols_o; j++) {
5171         *cam++ = *ba++; bj++;
5172       }
5173     }
5174     ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&cam);CHKERRQ(ierr);
5175   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5176   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5177   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5178   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5179   PetscFunctionReturn(0);
5180 }
5181 
5182 /*@
5183      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5184           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5185 
5186     Not Collective
5187 
5188    Input Parameters:
5189 +    A - the matrix
5190 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5191 
5192    Output Parameters:
5193 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5194 -    A_loc - the local sequential matrix generated
5195 
5196     Level: developer
5197 
5198    Notes:
5199      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5200 
5201 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5202 
5203 @*/
5204 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5205 {
5206   PetscErrorCode ierr;
5207   Mat            Ao,Ad;
5208   const PetscInt *cmap;
5209   PetscMPIInt    size;
5210   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5211 
5212   PetscFunctionBegin;
5213   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5214   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5215   if (size == 1) {
5216     if (scall == MAT_INITIAL_MATRIX) {
5217       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5218       *A_loc = Ad;
5219     } else if (scall == MAT_REUSE_MATRIX) {
5220       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5221     }
5222     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5223     PetscFunctionReturn(0);
5224   }
5225   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5226   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5227   if (f) {
5228     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5229   } else {
5230     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5231     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5232     Mat_SeqAIJ        *c;
5233     PetscInt          *ai = a->i, *aj = a->j;
5234     PetscInt          *bi = b->i, *bj = b->j;
5235     PetscInt          *ci,*cj;
5236     const PetscScalar *aa,*ba;
5237     PetscScalar       *ca;
5238     PetscInt          i,j,am,dn,on;
5239 
5240     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5241     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5242     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5243     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5244     if (scall == MAT_INITIAL_MATRIX) {
5245       PetscInt k;
5246       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5247       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5248       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5249       ci[0] = 0;
5250       for (i=0,k=0; i<am; i++) {
5251         const PetscInt ncols_o = bi[i+1] - bi[i];
5252         const PetscInt ncols_d = ai[i+1] - ai[i];
5253         ci[i+1] = ci[i] + ncols_o + ncols_d;
5254         /* diagonal portion of A */
5255         for (j=0; j<ncols_d; j++,k++) {
5256           cj[k] = *aj++;
5257           ca[k] = *aa++;
5258         }
5259         /* off-diagonal portion of A */
5260         for (j=0; j<ncols_o; j++,k++) {
5261           cj[k] = dn + *bj++;
5262           ca[k] = *ba++;
5263         }
5264       }
5265       /* put together the new matrix */
5266       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5267       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5268       /* Since these are PETSc arrays, change flags to free them as necessary. */
5269       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5270       c->free_a  = PETSC_TRUE;
5271       c->free_ij = PETSC_TRUE;
5272       c->nonew   = 0;
5273       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5274     } else if (scall == MAT_REUSE_MATRIX) {
5275       ierr = MatSeqAIJGetArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5276       for (i=0; i<am; i++) {
5277         const PetscInt ncols_d = ai[i+1] - ai[i];
5278         const PetscInt ncols_o = bi[i+1] - bi[i];
5279         /* diagonal portion of A */
5280         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5281         /* off-diagonal portion of A */
5282         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5283       }
5284       ierr = MatSeqAIJRestoreArrayWrite(*A_loc,&ca);CHKERRQ(ierr);
5285     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5286     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5287     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5288     if (glob) {
5289       PetscInt cst, *gidx;
5290 
5291       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5292       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5293       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5294       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5295       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5296     }
5297   }
5298   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5299   PetscFunctionReturn(0);
5300 }
5301 
5302 /*@C
5303      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5304 
5305     Not Collective
5306 
5307    Input Parameters:
5308 +    A - the matrix
5309 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5310 -    row, col - index sets of rows and columns to extract (or NULL)
5311 
5312    Output Parameter:
5313 .    A_loc - the local sequential matrix generated
5314 
5315     Level: developer
5316 
5317 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5318 
5319 @*/
5320 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5321 {
5322   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5323   PetscErrorCode ierr;
5324   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5325   IS             isrowa,iscola;
5326   Mat            *aloc;
5327   PetscBool      match;
5328 
5329   PetscFunctionBegin;
5330   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5331   PetscCheckFalse(!match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5332   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5333   if (!row) {
5334     start = A->rmap->rstart; end = A->rmap->rend;
5335     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5336   } else {
5337     isrowa = *row;
5338   }
5339   if (!col) {
5340     start = A->cmap->rstart;
5341     cmap  = a->garray;
5342     nzA   = a->A->cmap->n;
5343     nzB   = a->B->cmap->n;
5344     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5345     ncols = 0;
5346     for (i=0; i<nzB; i++) {
5347       if (cmap[i] < start) idx[ncols++] = cmap[i];
5348       else break;
5349     }
5350     imark = i;
5351     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5352     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5353     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5354   } else {
5355     iscola = *col;
5356   }
5357   if (scall != MAT_INITIAL_MATRIX) {
5358     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5359     aloc[0] = *A_loc;
5360   }
5361   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5362   if (!col) { /* attach global id of condensed columns */
5363     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5364   }
5365   *A_loc = aloc[0];
5366   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5367   if (!row) {
5368     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5369   }
5370   if (!col) {
5371     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5372   }
5373   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5374   PetscFunctionReturn(0);
5375 }
5376 
5377 /*
5378  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5379  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5380  * on a global size.
5381  * */
5382 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5383 {
5384   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5385   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5386   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5387   PetscMPIInt              owner;
5388   PetscSFNode              *iremote,*oiremote;
5389   const PetscInt           *lrowindices;
5390   PetscErrorCode           ierr;
5391   PetscSF                  sf,osf;
5392   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5393   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5394   MPI_Comm                 comm;
5395   ISLocalToGlobalMapping   mapping;
5396   const PetscScalar        *pd_a,*po_a;
5397 
5398   PetscFunctionBegin;
5399   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5400   /* plocalsize is the number of roots
5401    * nrows is the number of leaves
5402    * */
5403   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5404   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5405   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5406   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5407   for (i=0;i<nrows;i++) {
5408     /* Find a remote index and an owner for a row
5409      * The row could be local or remote
5410      * */
5411     owner = 0;
5412     lidx  = 0;
5413     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5414     iremote[i].index = lidx;
5415     iremote[i].rank  = owner;
5416   }
5417   /* Create SF to communicate how many nonzero columns for each row */
5418   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5419   /* SF will figure out the number of nonzero colunms for each row, and their
5420    * offsets
5421    * */
5422   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5423   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5424   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5425 
5426   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5427   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5428   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5429   roffsets[0] = 0;
5430   roffsets[1] = 0;
5431   for (i=0;i<plocalsize;i++) {
5432     /* diag */
5433     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5434     /* off diag */
5435     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5436     /* compute offsets so that we relative location for each row */
5437     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5438     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5439   }
5440   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5441   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5442   /* 'r' means root, and 'l' means leaf */
5443   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5444   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5445   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5446   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5447   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5448   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5449   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5450   dntotalcols = 0;
5451   ontotalcols = 0;
5452   ncol = 0;
5453   for (i=0;i<nrows;i++) {
5454     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5455     ncol = PetscMax(pnnz[i],ncol);
5456     /* diag */
5457     dntotalcols += nlcols[i*2+0];
5458     /* off diag */
5459     ontotalcols += nlcols[i*2+1];
5460   }
5461   /* We do not need to figure the right number of columns
5462    * since all the calculations will be done by going through the raw data
5463    * */
5464   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5465   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5466   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5467   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5468   /* diag */
5469   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5470   /* off diag */
5471   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5472   /* diag */
5473   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5474   /* off diag */
5475   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5476   dntotalcols = 0;
5477   ontotalcols = 0;
5478   ntotalcols  = 0;
5479   for (i=0;i<nrows;i++) {
5480     owner = 0;
5481     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5482     /* Set iremote for diag matrix */
5483     for (j=0;j<nlcols[i*2+0];j++) {
5484       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5485       iremote[dntotalcols].rank    = owner;
5486       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5487       ilocal[dntotalcols++]        = ntotalcols++;
5488     }
5489     /* off diag */
5490     for (j=0;j<nlcols[i*2+1];j++) {
5491       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5492       oiremote[ontotalcols].rank    = owner;
5493       oilocal[ontotalcols++]        = ntotalcols++;
5494     }
5495   }
5496   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5497   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5498   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5499   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5500   /* P serves as roots and P_oth is leaves
5501    * Diag matrix
5502    * */
5503   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5504   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5505   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5506 
5507   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5508   /* Off diag */
5509   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5510   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5511   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5512   ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5513   ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5514   /* We operate on the matrix internal data for saving memory */
5515   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5516   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5517   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5518   /* Convert to global indices for diag matrix */
5519   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5520   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5521   /* We want P_oth store global indices */
5522   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5523   /* Use memory scalable approach */
5524   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5525   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5526   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5527   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5528   /* Convert back to local indices */
5529   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5530   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5531   nout = 0;
5532   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5533   PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout);
5534   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5535   /* Exchange values */
5536   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5537   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5538   ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5539   ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5540   /* Stop PETSc from shrinking memory */
5541   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5542   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5543   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5544   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5545   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5546   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5547   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5548   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5549   PetscFunctionReturn(0);
5550 }
5551 
5552 /*
5553  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5554  * This supports MPIAIJ and MAIJ
5555  * */
5556 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5557 {
5558   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5559   Mat_SeqAIJ            *p_oth;
5560   IS                    rows,map;
5561   PetscHMapI            hamp;
5562   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5563   MPI_Comm              comm;
5564   PetscSF               sf,osf;
5565   PetscBool             has;
5566   PetscErrorCode        ierr;
5567 
5568   PetscFunctionBegin;
5569   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5570   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5571   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5572    *  and then create a submatrix (that often is an overlapping matrix)
5573    * */
5574   if (reuse == MAT_INITIAL_MATRIX) {
5575     /* Use a hash table to figure out unique keys */
5576     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5577     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5578     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5579     count = 0;
5580     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5581     for (i=0;i<a->B->cmap->n;i++) {
5582       key  = a->garray[i]/dof;
5583       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5584       if (!has) {
5585         mapping[i] = count;
5586         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5587       } else {
5588         /* Current 'i' has the same value the previous step */
5589         mapping[i] = count-1;
5590       }
5591     }
5592     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5593     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5594     PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count);
5595     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5596     off = 0;
5597     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5598     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5599     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5600     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5601     /* In case, the matrix was already created but users want to recreate the matrix */
5602     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5603     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5604     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5605     ierr = ISDestroy(&map);CHKERRQ(ierr);
5606     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5607   } else if (reuse == MAT_REUSE_MATRIX) {
5608     /* If matrix was already created, we simply update values using SF objects
5609      * that as attached to the matrix ealier.
5610      */
5611     const PetscScalar *pd_a,*po_a;
5612 
5613     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5614     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5615     PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5616     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5617     /* Update values in place */
5618     ierr = MatSeqAIJGetArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5619     ierr = MatSeqAIJGetArrayRead(p->B,&po_a);CHKERRQ(ierr);
5620     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5621     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5622     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5623     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5624     ierr = MatSeqAIJRestoreArrayRead(p->A,&pd_a);CHKERRQ(ierr);
5625     ierr = MatSeqAIJRestoreArrayRead(p->B,&po_a);CHKERRQ(ierr);
5626   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5627   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5628   PetscFunctionReturn(0);
5629 }
5630 
5631 /*@C
5632   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5633 
5634   Collective on Mat
5635 
5636   Input Parameters:
5637 + A - the first matrix in mpiaij format
5638 . B - the second matrix in mpiaij format
5639 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5640 
5641   Output Parameters:
5642 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5643 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5644 - B_seq - the sequential matrix generated
5645 
5646   Level: developer
5647 
5648 @*/
5649 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5650 {
5651   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5652   PetscErrorCode ierr;
5653   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5654   IS             isrowb,iscolb;
5655   Mat            *bseq=NULL;
5656 
5657   PetscFunctionBegin;
5658   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5659     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5660   }
5661   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5662 
5663   if (scall == MAT_INITIAL_MATRIX) {
5664     start = A->cmap->rstart;
5665     cmap  = a->garray;
5666     nzA   = a->A->cmap->n;
5667     nzB   = a->B->cmap->n;
5668     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5669     ncols = 0;
5670     for (i=0; i<nzB; i++) {  /* row < local row index */
5671       if (cmap[i] < start) idx[ncols++] = cmap[i];
5672       else break;
5673     }
5674     imark = i;
5675     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5676     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5677     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5678     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5679   } else {
5680     PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5681     isrowb  = *rowb; iscolb = *colb;
5682     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5683     bseq[0] = *B_seq;
5684   }
5685   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5686   *B_seq = bseq[0];
5687   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5688   if (!rowb) {
5689     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5690   } else {
5691     *rowb = isrowb;
5692   }
5693   if (!colb) {
5694     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5695   } else {
5696     *colb = iscolb;
5697   }
5698   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5699   PetscFunctionReturn(0);
5700 }
5701 
5702 /*
5703     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5704     of the OFF-DIAGONAL portion of local A
5705 
5706     Collective on Mat
5707 
5708    Input Parameters:
5709 +    A,B - the matrices in mpiaij format
5710 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5711 
5712    Output Parameter:
5713 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5714 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5715 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5716 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5717 
5718     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5719      for this matrix. This is not desirable..
5720 
5721     Level: developer
5722 
5723 */
5724 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5725 {
5726   PetscErrorCode         ierr;
5727   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5728   Mat_SeqAIJ             *b_oth;
5729   VecScatter             ctx;
5730   MPI_Comm               comm;
5731   const PetscMPIInt      *rprocs,*sprocs;
5732   const PetscInt         *srow,*rstarts,*sstarts;
5733   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5734   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5735   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5736   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5737   PetscMPIInt            size,tag,rank,nreqs;
5738 
5739   PetscFunctionBegin;
5740   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5741   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5742 
5743   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5744     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5745   }
5746   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5747   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5748 
5749   if (size == 1) {
5750     startsj_s = NULL;
5751     bufa_ptr  = NULL;
5752     *B_oth    = NULL;
5753     PetscFunctionReturn(0);
5754   }
5755 
5756   ctx = a->Mvctx;
5757   tag = ((PetscObject)ctx)->tag;
5758 
5759   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5760   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5761   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5762   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5763   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5764   rwaits = reqs;
5765   swaits = reqs + nrecvs;
5766 
5767   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5768   if (scall == MAT_INITIAL_MATRIX) {
5769     /* i-array */
5770     /*---------*/
5771     /*  post receives */
5772     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5773     for (i=0; i<nrecvs; i++) {
5774       rowlen = rvalues + rstarts[i]*rbs;
5775       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5776       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5777     }
5778 
5779     /* pack the outgoing message */
5780     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5781 
5782     sstartsj[0] = 0;
5783     rstartsj[0] = 0;
5784     len         = 0; /* total length of j or a array to be sent */
5785     if (nsends) {
5786       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5787       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5788     }
5789     for (i=0; i<nsends; i++) {
5790       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5791       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5792       for (j=0; j<nrows; j++) {
5793         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5794         for (l=0; l<sbs; l++) {
5795           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5796 
5797           rowlen[j*sbs+l] = ncols;
5798 
5799           len += ncols;
5800           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5801         }
5802         k++;
5803       }
5804       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5805 
5806       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5807     }
5808     /* recvs and sends of i-array are completed */
5809     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5810     ierr = PetscFree(svalues);CHKERRQ(ierr);
5811 
5812     /* allocate buffers for sending j and a arrays */
5813     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5814     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5815 
5816     /* create i-array of B_oth */
5817     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5818 
5819     b_othi[0] = 0;
5820     len       = 0; /* total length of j or a array to be received */
5821     k         = 0;
5822     for (i=0; i<nrecvs; i++) {
5823       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5824       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5825       for (j=0; j<nrows; j++) {
5826         b_othi[k+1] = b_othi[k] + rowlen[j];
5827         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5828         k++;
5829       }
5830       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5831     }
5832     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5833 
5834     /* allocate space for j and a arrrays of B_oth */
5835     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5836     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5837 
5838     /* j-array */
5839     /*---------*/
5840     /*  post receives of j-array */
5841     for (i=0; i<nrecvs; i++) {
5842       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5843       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5844     }
5845 
5846     /* pack the outgoing message j-array */
5847     if (nsends) k = sstarts[0];
5848     for (i=0; i<nsends; i++) {
5849       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5850       bufJ  = bufj+sstartsj[i];
5851       for (j=0; j<nrows; j++) {
5852         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5853         for (ll=0; ll<sbs; ll++) {
5854           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5855           for (l=0; l<ncols; l++) {
5856             *bufJ++ = cols[l];
5857           }
5858           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5859         }
5860       }
5861       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5862     }
5863 
5864     /* recvs and sends of j-array are completed */
5865     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5866   } else if (scall == MAT_REUSE_MATRIX) {
5867     sstartsj = *startsj_s;
5868     rstartsj = *startsj_r;
5869     bufa     = *bufa_ptr;
5870     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5871     ierr     = MatSeqAIJGetArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5872   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5873 
5874   /* a-array */
5875   /*---------*/
5876   /*  post receives of a-array */
5877   for (i=0; i<nrecvs; i++) {
5878     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5879     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5880   }
5881 
5882   /* pack the outgoing message a-array */
5883   if (nsends) k = sstarts[0];
5884   for (i=0; i<nsends; i++) {
5885     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5886     bufA  = bufa+sstartsj[i];
5887     for (j=0; j<nrows; j++) {
5888       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5889       for (ll=0; ll<sbs; ll++) {
5890         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5891         for (l=0; l<ncols; l++) {
5892           *bufA++ = vals[l];
5893         }
5894         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5895       }
5896     }
5897     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5898   }
5899   /* recvs and sends of a-array are completed */
5900   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5901   ierr = PetscFree(reqs);CHKERRQ(ierr);
5902 
5903   if (scall == MAT_INITIAL_MATRIX) {
5904     /* put together the new matrix */
5905     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5906 
5907     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5908     /* Since these are PETSc arrays, change flags to free them as necessary. */
5909     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5910     b_oth->free_a  = PETSC_TRUE;
5911     b_oth->free_ij = PETSC_TRUE;
5912     b_oth->nonew   = 0;
5913 
5914     ierr = PetscFree(bufj);CHKERRQ(ierr);
5915     if (!startsj_s || !bufa_ptr) {
5916       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5917       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5918     } else {
5919       *startsj_s = sstartsj;
5920       *startsj_r = rstartsj;
5921       *bufa_ptr  = bufa;
5922     }
5923   } else if (scall == MAT_REUSE_MATRIX) {
5924     ierr = MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);CHKERRQ(ierr);
5925   }
5926 
5927   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5928   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5929   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5930   PetscFunctionReturn(0);
5931 }
5932 
5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5936 #if defined(PETSC_HAVE_MKL_SPARSE)
5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5938 #endif
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5941 #if defined(PETSC_HAVE_ELEMENTAL)
5942 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5943 #endif
5944 #if defined(PETSC_HAVE_SCALAPACK)
5945 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5946 #endif
5947 #if defined(PETSC_HAVE_HYPRE)
5948 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5949 #endif
5950 #if defined(PETSC_HAVE_CUDA)
5951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5952 #endif
5953 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5954 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5955 #endif
5956 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5957 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5958 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5959 
5960 /*
5961     Computes (B'*A')' since computing B*A directly is untenable
5962 
5963                n                       p                          p
5964         [             ]       [             ]         [                 ]
5965       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5966         [             ]       [             ]         [                 ]
5967 
5968 */
5969 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5970 {
5971   PetscErrorCode ierr;
5972   Mat            At,Bt,Ct;
5973 
5974   PetscFunctionBegin;
5975   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5976   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5977   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5978   ierr = MatDestroy(&At);CHKERRQ(ierr);
5979   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5980   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5981   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5982   PetscFunctionReturn(0);
5983 }
5984 
5985 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5986 {
5987   PetscErrorCode ierr;
5988   PetscBool      cisdense;
5989 
5990   PetscFunctionBegin;
5991   PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n);
5992   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5993   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5994   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5995   if (!cisdense) {
5996     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5997   }
5998   ierr = MatSetUp(C);CHKERRQ(ierr);
5999 
6000   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6001   PetscFunctionReturn(0);
6002 }
6003 
6004 /* ----------------------------------------------------------------*/
6005 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6006 {
6007   Mat_Product *product = C->product;
6008   Mat         A = product->A,B=product->B;
6009 
6010   PetscFunctionBegin;
6011   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6012     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6013 
6014   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6015   C->ops->productsymbolic = MatProductSymbolic_AB;
6016   PetscFunctionReturn(0);
6017 }
6018 
6019 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6020 {
6021   PetscErrorCode ierr;
6022   Mat_Product    *product = C->product;
6023 
6024   PetscFunctionBegin;
6025   if (product->type == MATPRODUCT_AB) {
6026     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6027   }
6028   PetscFunctionReturn(0);
6029 }
6030 
6031 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
6032    is greater than value, or last if there is no such element.
6033 */
6034 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
6035 {
6036   PetscCount  it,step,count = last - first;
6037 
6038   PetscFunctionBegin;
6039   while (count > 0) {
6040     it   = first;
6041     step = count / 2;
6042     it  += step;
6043     if (!(value < array[it])) {
6044       first  = ++it;
6045       count -= step + 1;
6046     } else count = step;
6047   }
6048   *upper = first;
6049   PetscFunctionReturn(0);
6050 }
6051 
6052 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix
6053 
6054   Input Parameters:
6055 
6056     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6057     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6058 
6059     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat
6060 
6061     For Set1, j1[] contains column indices of the nonzeros.
6062     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6063     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6064     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6065 
6066     Similar for Set2.
6067 
6068     This routine merges the two sets of nonzeros row by row and removes repeats.
6069 
6070   Output Parameters: (memories are allocated by the caller)
6071 
6072     i[],j[]: the CSR of the merged matrix, which has m rows.
6073     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6074     imap2[]: similar to imap1[], but for Set2.
6075     Note we order nonzeros row-by-row and from left to right.
6076 */
6077 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
6078   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
6079   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
6080 {
6081   PetscErrorCode ierr;
6082   PetscInt       r,m; /* Row index of mat */
6083   PetscCount     t,t1,t2,b1,e1,b2,e2;
6084 
6085   PetscFunctionBegin;
6086   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
6087   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6088   i[0] = 0;
6089   for (r=0; r<m; r++) { /* Do row by row merging */
6090     b1   = rowBegin1[r];
6091     e1   = rowEnd1[r];
6092     b2   = rowBegin2[r];
6093     e2   = rowEnd2[r];
6094     while (b1 < e1 && b2 < e2) {
6095       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6096         j[t]      = j1[b1];
6097         imap1[t1] = t;
6098         imap2[t2] = t;
6099         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
6100         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6101         t1++; t2++; t++;
6102       } else if (j1[b1] < j2[b2]) {
6103         j[t]      = j1[b1];
6104         imap1[t1] = t;
6105         b1       += jmap1[t1+1] - jmap1[t1];
6106         t1++; t++;
6107       } else {
6108         j[t]      = j2[b2];
6109         imap2[t2] = t;
6110         b2       += jmap2[t2+1] - jmap2[t2];
6111         t2++; t++;
6112       }
6113     }
6114     /* Merge the remaining in either j1[] or j2[] */
6115     while (b1 < e1) {
6116       j[t]      = j1[b1];
6117       imap1[t1] = t;
6118       b1       += jmap1[t1+1] - jmap1[t1];
6119       t1++; t++;
6120     }
6121     while (b2 < e2) {
6122       j[t]      = j2[b2];
6123       imap2[t2] = t;
6124       b2       += jmap2[t2+1] - jmap2[t2];
6125       t2++; t++;
6126     }
6127     i[r+1] = t;
6128   }
6129   PetscFunctionReturn(0);
6130 }
6131 
6132 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block
6133 
6134   Input Parameters:
6135     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6136     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6137       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6138 
6139       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6140       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6141 
6142   Output Parameters:
6143     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6144     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6145       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6146       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6147 
6148     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6149       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6150         repeats (i.e., same 'i,j' pair).
6151       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6152         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6153 
6154       Atot: number of entries belonging to the diagonal block
6155       Annz: number of unique nonzeros belonging to the diagonal block.
6156 
6157     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6158 
6159     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
6160 */
6161 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
6162   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
6163   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
6164   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
6165 {
6166   PetscErrorCode    ierr;
6167   PetscInt          cstart,cend,rstart,rend,row,col;
6168   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6169   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6170   PetscCount        k,m,p,q,r,s,mid;
6171   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;
6172 
6173   PetscFunctionBegin;
6174   ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr);
6175   ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr);
6176   m    = rend - rstart;
6177 
6178   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */
6179 
6180   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6181      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6182   */
6183   while (k<n) {
6184     row = i[k];
6185     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6186     for (s=k; s<n; s++) if (i[s] != row) break;
6187     for (p=k; p<s; p++) {
6188       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6189       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
6190     }
6191     ierr = PetscSortIntWithCountArray(s-k,j+k,perm+k);CHKERRQ(ierr);
6192     ierr = PetscSortedIntUpperBound(j,k,s,-1,&mid);CHKERRQ(ierr); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6193     rowBegin[row-rstart] = k;
6194     rowMid[row-rstart]   = mid;
6195     rowEnd[row-rstart]   = s;
6196 
6197     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6198     Atot += mid - k;
6199     Btot += s - mid;
6200 
6201     /* Count unique nonzeros of this diag/offdiag row */
6202     for (p=k; p<mid;) {
6203       col = j[p];
6204       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
6205       Annz++;
6206     }
6207 
6208     for (p=mid; p<s;) {
6209       col = j[p];
6210       do {p++;} while (p<s && j[p] == col);
6211       Bnnz++;
6212     }
6213     k = s;
6214   }
6215 
6216   /* Allocation according to Atot, Btot, Annz, Bnnz */
6217   ierr = PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap);CHKERRQ(ierr);
6218 
6219   /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6220   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6221   for (r=0; r<m; r++) {
6222     k     = rowBegin[r];
6223     mid   = rowMid[r];
6224     s     = rowEnd[r];
6225     ierr  = PetscArraycpy(Aperm+Atot,perm+k,  mid-k);CHKERRQ(ierr);
6226     ierr  = PetscArraycpy(Bperm+Btot,perm+mid,s-mid);CHKERRQ(ierr);
6227     Atot += mid - k;
6228     Btot += s - mid;
6229 
6230     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6231     for (p=k; p<mid;) {
6232       col = j[p];
6233       q   = p;
6234       do {p++;} while (p<mid && j[p] == col);
6235       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6236       Annz++;
6237     }
6238 
6239     for (p=mid; p<s;) {
6240       col = j[p];
6241       q   = p;
6242       do {p++;} while (p<s && j[p] == col);
6243       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6244       Bnnz++;
6245     }
6246   }
6247   /* Output */
6248   *Aperm_ = Aperm;
6249   *Annz_  = Annz;
6250   *Atot_  = Atot;
6251   *Ajmap_ = Ajmap;
6252   *Bperm_ = Bperm;
6253   *Bnnz_  = Bnnz;
6254   *Btot_  = Btot;
6255   *Bjmap_ = Bjmap;
6256   PetscFunctionReturn(0);
6257 }
6258 
6259 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6260 {
6261   PetscErrorCode            ierr;
6262   MPI_Comm                  comm;
6263   PetscMPIInt               rank,size;
6264   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6265   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6266   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;
6267 
6268   PetscFunctionBegin;
6269   ierr = PetscFree(mpiaij->garray);CHKERRQ(ierr);
6270   ierr = VecDestroy(&mpiaij->lvec);CHKERRQ(ierr);
6271 #if defined(PETSC_USE_CTABLE)
6272   ierr = PetscTableDestroy(&mpiaij->colmap);CHKERRQ(ierr);
6273 #else
6274   ierr = PetscFree(mpiaij->colmap);CHKERRQ(ierr);
6275 #endif
6276   ierr = VecScatterDestroy(&mpiaij->Mvctx);CHKERRQ(ierr);
6277   mat->assembled = PETSC_FALSE;
6278   mat->was_assembled = PETSC_FALSE;
6279   ierr = MatResetPreallocationCOO_MPIAIJ(mat);CHKERRQ(ierr);
6280 
6281   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
6282   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
6283   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
6284   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
6285   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
6286   ierr = PetscLayoutGetRange(mat->rmap,&rstart,&rend);CHKERRQ(ierr);
6287   ierr = PetscLayoutGetRange(mat->cmap,&cstart,&cend);CHKERRQ(ierr);
6288   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
6289   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
6290 
6291   /* ---------------------------------------------------------------------------*/
6292   /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */
6293   /* entries come first, then local rows, then remote rows.                     */
6294   /* ---------------------------------------------------------------------------*/
6295   PetscCount n1 = coo_n,*perm1;
6296   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6297   ierr = PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1);CHKERRQ(ierr);
6298   ierr = PetscArraycpy(i1,coo_i,n1);CHKERRQ(ierr); /* Make a copy since we'll modify it */
6299   ierr = PetscArraycpy(j1,coo_j,n1);CHKERRQ(ierr);
6300   for (k=0; k<n1; k++) perm1[k] = k;
6301 
6302   /* Manipulate indices so that entries with negative row or col indices will have smallest
6303      row indices, local entries will have greater but negative row indices, and remote entries
6304      will have positive row indices.
6305   */
6306   for (k=0; k<n1; k++) {
6307     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6308     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6309     else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6310     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6311   }
6312 
6313   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6314   ierr = PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1);CHKERRQ(ierr);
6315   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6316   ierr = PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem);CHKERRQ(ierr); /* rem is upper bound of the last local row */
6317   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/
6318 
6319   /* ---------------------------------------------------------------------------*/
6320   /*           Split local rows into diag/offdiag portions                      */
6321   /* ---------------------------------------------------------------------------*/
6322   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6323   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6324   PetscCount   Annz1,Bnnz1,Atot1,Btot1;
6325 
6326   ierr = PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1);CHKERRQ(ierr);
6327   ierr = PetscMalloc1(n1-rem,&Cperm1);CHKERRQ(ierr);
6328   ierr = MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1);CHKERRQ(ierr);
6329 
6330   /* ---------------------------------------------------------------------------*/
6331   /*           Send remote rows to their owner                                  */
6332   /* ---------------------------------------------------------------------------*/
6333   /* Find which rows should be sent to which remote ranks*/
6334   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6335   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6336   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6337   const PetscInt *ranges;
6338   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6339 
6340   ierr = PetscLayoutGetRanges(mat->rmap,&ranges);CHKERRQ(ierr);
6341   ierr = PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries);CHKERRQ(ierr);
6342   for (k=rem; k<n1;) {
6343     PetscMPIInt  owner;
6344     PetscInt     firstRow,lastRow;
6345 
6346     /* Locate a row range */
6347     firstRow = i1[k]; /* first row of this owner */
6348     ierr     = PetscLayoutFindOwner(mat->rmap,firstRow,&owner);CHKERRQ(ierr);
6349     lastRow  = ranges[owner+1]-1; /* last row of this owner */
6350 
6351     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6352     ierr = PetscSortedIntUpperBound(i1,k,n1,lastRow,&p);CHKERRQ(ierr);
6353 
6354     /* All entries in [k,p) belong to this remote owner */
6355     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6356       PetscMPIInt *sendto2;
6357       PetscInt    *nentries2;
6358       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;
6359 
6360       ierr = PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2);CHKERRQ(ierr);
6361       ierr = PetscArraycpy(sendto2,sendto,maxNsend);CHKERRQ(ierr);
6362       ierr = PetscArraycpy(nentries2,nentries2,maxNsend+1);CHKERRQ(ierr);
6363       ierr = PetscFree2(sendto,nentries2);CHKERRQ(ierr);
6364       sendto      = sendto2;
6365       nentries    = nentries2;
6366       maxNsend    = maxNsend2;
6367     }
6368     sendto[nsend]   = owner;
6369     nentries[nsend] = p - k;
6370     ierr = PetscCountCast(p-k,&nentries[nsend]);CHKERRQ(ierr);
6371     nsend++;
6372     k = p;
6373   }
6374 
6375   /* Build 1st SF to know offsets on remote to send data */
6376   PetscSF     sf1;
6377   PetscInt    nroots = 1,nroots2 = 0;
6378   PetscInt    nleaves = nsend,nleaves2 = 0;
6379   PetscInt    *offsets;
6380   PetscSFNode *iremote;
6381 
6382   ierr = PetscSFCreate(comm,&sf1);CHKERRQ(ierr);
6383   ierr = PetscMalloc1(nsend,&iremote);CHKERRQ(ierr);
6384   ierr = PetscMalloc1(nsend,&offsets);CHKERRQ(ierr);
6385   for (k=0; k<nsend; k++) {
6386     iremote[k].rank  = sendto[k];
6387     iremote[k].index = 0;
6388     nleaves2        += nentries[k];
6389     PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt");
6390   }
6391   ierr = PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
6392   ierr = PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM);CHKERRQ(ierr);
6393   ierr = PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM);CHKERRQ(ierr); /* Would nroots2 overflow, we check offsets[] below */
6394   ierr = PetscSFDestroy(&sf1);CHKERRQ(ierr);
6395   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);
6396 
6397   /* Build 2nd SF to send remote COOs to their owner */
6398   PetscSF sf2;
6399   nroots  = nroots2;
6400   nleaves = nleaves2;
6401   ierr    = PetscSFCreate(comm,&sf2);CHKERRQ(ierr);
6402   ierr    = PetscSFSetFromOptions(sf2);CHKERRQ(ierr);
6403   ierr    = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
6404   p       = 0;
6405   for (k=0; k<nsend; k++) {
6406     PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt");
6407     for (q=0; q<nentries[k]; q++,p++) {
6408       iremote[p].rank  = sendto[k];
6409       iremote[p].index = offsets[k] + q;
6410     }
6411   }
6412   ierr = PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
6413 
6414   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */
6415   ierr = PetscArraycpy(Cperm1,perm1+rem,n1-rem);CHKERRQ(ierr);
6416 
6417   /* Send the remote COOs to their owner */
6418   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6419   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6420   ierr = PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2);CHKERRQ(ierr);
6421   ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE);CHKERRQ(ierr);
6422   ierr = PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE);CHKERRQ(ierr);
6423   ierr = PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE);CHKERRQ(ierr);
6424   ierr = PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE);CHKERRQ(ierr);
6425 
6426   ierr = PetscFree(offsets);CHKERRQ(ierr);
6427   ierr = PetscFree2(sendto,nentries);CHKERRQ(ierr);
6428 
6429   /* ---------------------------------------------------------------*/
6430   /* Sort received COOs by row along with the permutation array     */
6431   /* ---------------------------------------------------------------*/
6432   for (k=0; k<n2; k++) perm2[k] = k;
6433   ierr = PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2);CHKERRQ(ierr);
6434 
6435   /* ---------------------------------------------------------------*/
6436   /* Split received COOs into diag/offdiag portions                 */
6437   /* ---------------------------------------------------------------*/
6438   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6439   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6440   PetscCount  Annz2,Bnnz2,Atot2,Btot2;
6441 
6442   ierr = PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2);CHKERRQ(ierr);
6443   ierr = MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2);CHKERRQ(ierr);
6444 
6445   /* --------------------------------------------------------------------------*/
6446   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6447   /* --------------------------------------------------------------------------*/
6448   PetscInt   *Ai,*Bi;
6449   PetscInt   *Aj,*Bj;
6450 
6451   ierr = PetscMalloc1(m+1,&Ai);CHKERRQ(ierr);
6452   ierr = PetscMalloc1(m+1,&Bi);CHKERRQ(ierr);
6453   ierr = PetscMalloc1(Annz1+Annz2,&Aj);CHKERRQ(ierr); /* Since local and remote entries might have dups, we might allocate excess memory */
6454   ierr = PetscMalloc1(Bnnz1+Bnnz2,&Bj);CHKERRQ(ierr);
6455 
6456   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6457   ierr = PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2);CHKERRQ(ierr);
6458 
6459   ierr = MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj);CHKERRQ(ierr);
6460   ierr = MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj);CHKERRQ(ierr);
6461   ierr = PetscFree3(rowBegin1,rowMid1,rowEnd1);CHKERRQ(ierr);
6462   ierr = PetscFree3(rowBegin2,rowMid2,rowEnd2);CHKERRQ(ierr);
6463   ierr = PetscFree3(i1,j1,perm1);CHKERRQ(ierr);
6464   ierr = PetscFree3(i2,j2,perm2);CHKERRQ(ierr);
6465 
6466   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6467   PetscInt Annz = Ai[m];
6468   PetscInt Bnnz = Bi[m];
6469   if (Annz < Annz1 + Annz2) {
6470     PetscInt *Aj_new;
6471     ierr = PetscMalloc1(Annz,&Aj_new);CHKERRQ(ierr);
6472     ierr = PetscArraycpy(Aj_new,Aj,Annz);CHKERRQ(ierr);
6473     ierr = PetscFree(Aj);CHKERRQ(ierr);
6474     Aj   = Aj_new;
6475   }
6476 
6477   if (Bnnz < Bnnz1 + Bnnz2) {
6478     PetscInt *Bj_new;
6479     ierr = PetscMalloc1(Bnnz,&Bj_new);CHKERRQ(ierr);
6480     ierr = PetscArraycpy(Bj_new,Bj,Bnnz);CHKERRQ(ierr);
6481     ierr = PetscFree(Bj);CHKERRQ(ierr);
6482     Bj   = Bj_new;
6483   }
6484 
6485   /* --------------------------------------------------------------------------------*/
6486   /* Create new submatrices for on-process and off-process coupling                  */
6487   /* --------------------------------------------------------------------------------*/
6488   PetscScalar   *Aa,*Ba;
6489   MatType       rtype;
6490   Mat_SeqAIJ    *a,*b;
6491   ierr = PetscCalloc1(Annz,&Aa);CHKERRQ(ierr); /* Zero matrix on device */
6492   ierr = PetscCalloc1(Bnnz,&Ba);CHKERRQ(ierr);
6493   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6494   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6495   ierr = MatDestroy(&mpiaij->A);CHKERRQ(ierr);
6496   ierr = MatDestroy(&mpiaij->B);CHKERRQ(ierr);
6497   ierr = MatGetRootType_Private(mat,&rtype);CHKERRQ(ierr);
6498   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A);CHKERRQ(ierr);
6499   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B);CHKERRQ(ierr);
6500   ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
6501 
6502   a = (Mat_SeqAIJ*)mpiaij->A->data;
6503   b = (Mat_SeqAIJ*)mpiaij->B->data;
6504   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6505   a->free_a       = b->free_a       = PETSC_TRUE;
6506   a->free_ij      = b->free_ij      = PETSC_TRUE;
6507 
6508   /* conversion must happen AFTER multiply setup */
6509   ierr = MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A);CHKERRQ(ierr);
6510   ierr = MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B);CHKERRQ(ierr);
6511   ierr = VecDestroy(&mpiaij->lvec);CHKERRQ(ierr);
6512   ierr = MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL);CHKERRQ(ierr);
6513   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec);CHKERRQ(ierr);
6514 
6515   mpiaij->coo_n   = coo_n;
6516   mpiaij->coo_sf  = sf2;
6517   mpiaij->sendlen = nleaves;
6518   mpiaij->recvlen = nroots;
6519 
6520   mpiaij->Annz1   = Annz1;
6521   mpiaij->Annz2   = Annz2;
6522   mpiaij->Bnnz1   = Bnnz1;
6523   mpiaij->Bnnz2   = Bnnz2;
6524 
6525   mpiaij->Atot1   = Atot1;
6526   mpiaij->Atot2   = Atot2;
6527   mpiaij->Btot1   = Btot1;
6528   mpiaij->Btot2   = Btot2;
6529 
6530   mpiaij->Aimap1  = Aimap1;
6531   mpiaij->Aimap2  = Aimap2;
6532   mpiaij->Bimap1  = Bimap1;
6533   mpiaij->Bimap2  = Bimap2;
6534 
6535   mpiaij->Ajmap1  = Ajmap1;
6536   mpiaij->Ajmap2  = Ajmap2;
6537   mpiaij->Bjmap1  = Bjmap1;
6538   mpiaij->Bjmap2  = Bjmap2;
6539 
6540   mpiaij->Aperm1  = Aperm1;
6541   mpiaij->Aperm2  = Aperm2;
6542   mpiaij->Bperm1  = Bperm1;
6543   mpiaij->Bperm2  = Bperm2;
6544 
6545   mpiaij->Cperm1  = Cperm1;
6546 
6547   /* Allocate in preallocation. If not used, it has zero cost on host */
6548   ierr = PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf);CHKERRQ(ierr);
6549   PetscFunctionReturn(0);
6550 }
6551 
6552 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6553 {
6554   PetscErrorCode       ierr;
6555   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6556   Mat                  A = mpiaij->A,B = mpiaij->B;
6557   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6558   PetscScalar          *Aa,*Ba;
6559   PetscScalar          *sendbuf = mpiaij->sendbuf;
6560   PetscScalar          *recvbuf = mpiaij->recvbuf;
6561   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6562   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6563   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6564   const PetscCount     *Cperm1 = mpiaij->Cperm1;
6565 
6566   PetscFunctionBegin;
6567   ierr = MatSeqAIJGetArray(A,&Aa);CHKERRQ(ierr); /* Might read and write matrix values */
6568   ierr = MatSeqAIJGetArray(B,&Ba);CHKERRQ(ierr);
6569   if (imode == INSERT_VALUES) {
6570     ierr = PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr);
6571     ierr = PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar));CHKERRQ(ierr);
6572   }
6573 
6574   /* Pack entries to be sent to remote */
6575   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6576 
6577   /* Send remote entries to their owner and overlap the communication with local computation */
6578   ierr = PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE);CHKERRQ(ierr);
6579   /* Add local entries to A and B */
6580   for (PetscCount i=0; i<Annz1; i++) {
6581     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6582   }
6583   for (PetscCount i=0; i<Bnnz1; i++) {
6584     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6585   }
6586   ierr = PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE);CHKERRQ(ierr);
6587 
6588   /* Add received remote entries to A and B */
6589   for (PetscCount i=0; i<Annz2; i++) {
6590     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6591   }
6592   for (PetscCount i=0; i<Bnnz2; i++) {
6593     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6594   }
6595   ierr = MatSeqAIJRestoreArray(A,&Aa);CHKERRQ(ierr);
6596   ierr = MatSeqAIJRestoreArray(B,&Ba);CHKERRQ(ierr);
6597   PetscFunctionReturn(0);
6598 }
6599 
6600 /* ----------------------------------------------------------------*/
6601 
6602 /*MC
6603    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6604 
6605    Options Database Keys:
6606 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6607 
6608    Level: beginner
6609 
6610    Notes:
6611     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6612     in this case the values associated with the rows and columns one passes in are set to zero
6613     in the matrix
6614 
6615     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6616     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6617 
6618 .seealso: MatCreateAIJ()
6619 M*/
6620 
6621 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6622 {
6623   Mat_MPIAIJ     *b;
6624   PetscErrorCode ierr;
6625   PetscMPIInt    size;
6626 
6627   PetscFunctionBegin;
6628   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6629 
6630   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6631   B->data       = (void*)b;
6632   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6633   B->assembled  = PETSC_FALSE;
6634   B->insertmode = NOT_SET_VALUES;
6635   b->size       = size;
6636 
6637   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6638 
6639   /* build cache for off array entries formed */
6640   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6641 
6642   b->donotstash  = PETSC_FALSE;
6643   b->colmap      = NULL;
6644   b->garray      = NULL;
6645   b->roworiented = PETSC_TRUE;
6646 
6647   /* stuff used for matrix vector multiply */
6648   b->lvec  = NULL;
6649   b->Mvctx = NULL;
6650 
6651   /* stuff for MatGetRow() */
6652   b->rowindices   = NULL;
6653   b->rowvalues    = NULL;
6654   b->getrowactive = PETSC_FALSE;
6655 
6656   /* flexible pointer used in CUSPARSE classes */
6657   b->spptr = NULL;
6658 
6659   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6661   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6662   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6664   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6665   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6667   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6669 #if defined(PETSC_HAVE_CUDA)
6670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6671 #endif
6672 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6674 #endif
6675 #if defined(PETSC_HAVE_MKL_SPARSE)
6676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6677 #endif
6678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6682 #if defined(PETSC_HAVE_ELEMENTAL)
6683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6684 #endif
6685 #if defined(PETSC_HAVE_SCALAPACK)
6686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6687 #endif
6688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6690 #if defined(PETSC_HAVE_HYPRE)
6691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6692   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6693 #endif
6694   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6695   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6696   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ);CHKERRQ(ierr);
6697   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ);CHKERRQ(ierr);
6698   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6699   PetscFunctionReturn(0);
6700 }
6701 
6702 /*@C
6703      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6704          and "off-diagonal" part of the matrix in CSR format.
6705 
6706    Collective
6707 
6708    Input Parameters:
6709 +  comm - MPI communicator
6710 .  m - number of local rows (Cannot be PETSC_DECIDE)
6711 .  n - This value should be the same as the local size used in creating the
6712        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6713        calculated if N is given) For square matrices n is almost always m.
6714 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6715 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6716 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6717 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6718 .   a - matrix values
6719 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6720 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6721 -   oa - matrix values
6722 
6723    Output Parameter:
6724 .   mat - the matrix
6725 
6726    Level: advanced
6727 
6728    Notes:
6729        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6730        must free the arrays once the matrix has been destroyed and not before.
6731 
6732        The i and j indices are 0 based
6733 
6734        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6735 
6736        This sets local rows and cannot be used to set off-processor values.
6737 
6738        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6739        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6740        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6741        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6742        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6743        communication if it is known that only local entries will be set.
6744 
6745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6746           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6747 @*/
6748 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6749 {
6750   PetscErrorCode ierr;
6751   Mat_MPIAIJ     *maij;
6752 
6753   PetscFunctionBegin;
6754   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6755   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6756   PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6757   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6758   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6759   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6760   maij = (Mat_MPIAIJ*) (*mat)->data;
6761 
6762   (*mat)->preallocated = PETSC_TRUE;
6763 
6764   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6765   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6766 
6767   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6768   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6769 
6770   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6771   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6772   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6773   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6774   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6775   PetscFunctionReturn(0);
6776 }
6777 
6778 typedef struct {
6779   Mat       *mp;    /* intermediate products */
6780   PetscBool *mptmp; /* is the intermediate product temporary ? */
6781   PetscInt  cp;     /* number of intermediate products */
6782 
6783   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6784   PetscInt    *startsj_s,*startsj_r;
6785   PetscScalar *bufa;
6786   Mat         P_oth;
6787 
6788   /* may take advantage of merging product->B */
6789   Mat Bloc; /* B-local by merging diag and off-diag */
6790 
6791   /* cusparse does not have support to split between symbolic and numeric phases.
6792      When api_user is true, we don't need to update the numerical values
6793      of the temporary storage */
6794   PetscBool reusesym;
6795 
6796   /* support for COO values insertion */
6797   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6798   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6799   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6800   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6801   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6802   PetscMemType mtype;
6803 
6804   /* customization */
6805   PetscBool abmerge;
6806   PetscBool P_oth_bind;
6807 } MatMatMPIAIJBACKEND;
6808 
6809 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6810 {
6811   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6812   PetscInt            i;
6813   PetscErrorCode      ierr;
6814 
6815   PetscFunctionBegin;
6816   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6817   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6818   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6819   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6820   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6821   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6822   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6823   for (i = 0; i < mmdata->cp; i++) {
6824     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6825   }
6826   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6827   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6828   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6829   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6830   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6831   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6832   PetscFunctionReturn(0);
6833 }
6834 
6835 /* Copy selected n entries with indices in idx[] of A to v[].
6836    If idx is NULL, copy the whole data array of A to v[]
6837  */
6838 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6839 {
6840   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6841   PetscErrorCode ierr;
6842 
6843   PetscFunctionBegin;
6844   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6845   if (f) {
6846     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6847   } else {
6848     const PetscScalar *vv;
6849 
6850     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6851     if (n && idx) {
6852       PetscScalar    *w = v;
6853       const PetscInt *oi = idx;
6854       PetscInt       j;
6855 
6856       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6857     } else {
6858       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6859     }
6860     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6861   }
6862   PetscFunctionReturn(0);
6863 }
6864 
6865 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6866 {
6867   MatMatMPIAIJBACKEND *mmdata;
6868   PetscInt            i,n_d,n_o;
6869   PetscErrorCode      ierr;
6870 
6871   PetscFunctionBegin;
6872   MatCheckProduct(C,1);
6873   PetscCheckFalse(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6874   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6875   if (!mmdata->reusesym) { /* update temporary matrices */
6876     if (mmdata->P_oth) {
6877       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6878     }
6879     if (mmdata->Bloc) {
6880       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6881     }
6882   }
6883   mmdata->reusesym = PETSC_FALSE;
6884 
6885   for (i = 0; i < mmdata->cp; i++) {
6886     PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6887     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6888   }
6889   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6890     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6891 
6892     if (mmdata->mptmp[i]) continue;
6893     if (noff) {
6894       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6895 
6896       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6897       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6898       n_o += noff;
6899       n_d += nown;
6900     } else {
6901       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6902 
6903       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6904       n_d += mm->nz;
6905     }
6906   }
6907   if (mmdata->hasoffproc) { /* offprocess insertion */
6908     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6909     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6910   }
6911   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6912   PetscFunctionReturn(0);
6913 }
6914 
6915 /* Support for Pt * A, A * P, or Pt * A * P */
6916 #define MAX_NUMBER_INTERMEDIATE 4
6917 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6918 {
6919   Mat_Product            *product = C->product;
6920   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6921   Mat_MPIAIJ             *a,*p;
6922   MatMatMPIAIJBACKEND    *mmdata;
6923   ISLocalToGlobalMapping P_oth_l2g = NULL;
6924   IS                     glob = NULL;
6925   const char             *prefix;
6926   char                   pprefix[256];
6927   const PetscInt         *globidx,*P_oth_idx;
6928   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6929   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6930   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6931                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6932                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6933   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6934 
6935   MatProductType         ptype;
6936   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6937   PetscMPIInt            size;
6938   PetscErrorCode         ierr;
6939 
6940   PetscFunctionBegin;
6941   MatCheckProduct(C,1);
6942   PetscCheckFalse(product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6943   ptype = product->type;
6944   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6945     ptype = MATPRODUCT_AB;
6946     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6947   }
6948   switch (ptype) {
6949   case MATPRODUCT_AB:
6950     A = product->A;
6951     P = product->B;
6952     m = A->rmap->n;
6953     n = P->cmap->n;
6954     M = A->rmap->N;
6955     N = P->cmap->N;
6956     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6957     break;
6958   case MATPRODUCT_AtB:
6959     P = product->A;
6960     A = product->B;
6961     m = P->cmap->n;
6962     n = A->cmap->n;
6963     M = P->cmap->N;
6964     N = A->cmap->N;
6965     hasoffproc = PETSC_TRUE;
6966     break;
6967   case MATPRODUCT_PtAP:
6968     A = product->A;
6969     P = product->B;
6970     m = P->cmap->n;
6971     n = P->cmap->n;
6972     M = P->cmap->N;
6973     N = P->cmap->N;
6974     hasoffproc = PETSC_TRUE;
6975     break;
6976   default:
6977     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6978   }
6979   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6980   if (size == 1) hasoffproc = PETSC_FALSE;
6981 
6982   /* defaults */
6983   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6984     mp[i]    = NULL;
6985     mptmp[i] = PETSC_FALSE;
6986     rmapt[i] = -1;
6987     cmapt[i] = -1;
6988     rmapa[i] = NULL;
6989     cmapa[i] = NULL;
6990   }
6991 
6992   /* customization */
6993   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6994   mmdata->reusesym = product->api_user;
6995   if (ptype == MATPRODUCT_AB) {
6996     if (product->api_user) {
6997       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6998       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6999       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7000       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7001     } else {
7002       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7003       ierr = PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
7004       ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7005       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7006     }
7007   } else if (ptype == MATPRODUCT_PtAP) {
7008     if (product->api_user) {
7009       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7010       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7011       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7012     } else {
7013       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7014       ierr = PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
7015       ierr = PetscOptionsEnd();CHKERRQ(ierr);
7016     }
7017   }
7018   a = (Mat_MPIAIJ*)A->data;
7019   p = (Mat_MPIAIJ*)P->data;
7020   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
7021   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
7022   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
7023   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
7024   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
7025 
7026   cp   = 0;
7027   switch (ptype) {
7028   case MATPRODUCT_AB: /* A * P */
7029     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
7030 
7031     /* A_diag * P_local (merged or not) */
7032     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7033       /* P is product->B */
7034       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7035       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7036       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7037       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7038       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7039       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7040       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7041       mp[cp]->product->api_user = product->api_user;
7042       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7043       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7044       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7045       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7046       rmapt[cp] = 1;
7047       cmapt[cp] = 2;
7048       cmapa[cp] = globidx;
7049       mptmp[cp] = PETSC_FALSE;
7050       cp++;
7051     } else { /* A_diag * P_diag and A_diag * P_off */
7052       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
7053       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7054       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7055       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7056       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7057       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7058       mp[cp]->product->api_user = product->api_user;
7059       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7060       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7061       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7062       rmapt[cp] = 1;
7063       cmapt[cp] = 1;
7064       mptmp[cp] = PETSC_FALSE;
7065       cp++;
7066       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
7067       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7068       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7069       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7070       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7071       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7072       mp[cp]->product->api_user = product->api_user;
7073       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7074       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7075       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7076       rmapt[cp] = 1;
7077       cmapt[cp] = 2;
7078       cmapa[cp] = p->garray;
7079       mptmp[cp] = PETSC_FALSE;
7080       cp++;
7081     }
7082 
7083     /* A_off * P_other */
7084     if (mmdata->P_oth) {
7085       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
7086       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7087       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
7088       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
7089       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
7090       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7091       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7092       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7093       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7094       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7095       mp[cp]->product->api_user = product->api_user;
7096       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7097       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7098       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7099       rmapt[cp] = 1;
7100       cmapt[cp] = 2;
7101       cmapa[cp] = P_oth_idx;
7102       mptmp[cp] = PETSC_FALSE;
7103       cp++;
7104     }
7105     break;
7106 
7107   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7108     /* A is product->B */
7109     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7110     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7111       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7112       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7113       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7114       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7115       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7116       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7117       mp[cp]->product->api_user = product->api_user;
7118       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7119       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7120       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7121       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7122       rmapt[cp] = 2;
7123       rmapa[cp] = globidx;
7124       cmapt[cp] = 2;
7125       cmapa[cp] = globidx;
7126       mptmp[cp] = PETSC_FALSE;
7127       cp++;
7128     } else {
7129       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7130       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7131       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7132       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7133       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7134       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7135       mp[cp]->product->api_user = product->api_user;
7136       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7137       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7138       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7139       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7140       rmapt[cp] = 1;
7141       cmapt[cp] = 2;
7142       cmapa[cp] = globidx;
7143       mptmp[cp] = PETSC_FALSE;
7144       cp++;
7145       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7146       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7147       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7148       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7149       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7150       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7151       mp[cp]->product->api_user = product->api_user;
7152       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7153       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7154       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7155       rmapt[cp] = 2;
7156       rmapa[cp] = p->garray;
7157       cmapt[cp] = 2;
7158       cmapa[cp] = globidx;
7159       mptmp[cp] = PETSC_FALSE;
7160       cp++;
7161     }
7162     break;
7163   case MATPRODUCT_PtAP:
7164     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
7165     /* P is product->B */
7166     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
7167     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
7168     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
7169     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7170     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7171     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7172     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7173     mp[cp]->product->api_user = product->api_user;
7174     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7175     PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7176     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7177     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
7178     rmapt[cp] = 2;
7179     rmapa[cp] = globidx;
7180     cmapt[cp] = 2;
7181     cmapa[cp] = globidx;
7182     mptmp[cp] = PETSC_FALSE;
7183     cp++;
7184     if (mmdata->P_oth) {
7185       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
7186       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7187       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
7188       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
7189       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
7190       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
7191       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7192       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7193       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7194       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7195       mp[cp]->product->api_user = product->api_user;
7196       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7197       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7198       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7199       mptmp[cp] = PETSC_TRUE;
7200       cp++;
7201       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
7202       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
7203       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
7204       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);CHKERRQ(ierr);
7205       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
7206       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
7207       mp[cp]->product->api_user = product->api_user;
7208       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
7209       PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
7210       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
7211       rmapt[cp] = 2;
7212       rmapa[cp] = globidx;
7213       cmapt[cp] = 2;
7214       cmapa[cp] = P_oth_idx;
7215       mptmp[cp] = PETSC_FALSE;
7216       cp++;
7217     }
7218     break;
7219   default:
7220     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
7221   }
7222   /* sanity check */
7223   if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i);
7224 
7225   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
7226   for (i = 0; i < cp; i++) {
7227     mmdata->mp[i]    = mp[i];
7228     mmdata->mptmp[i] = mptmp[i];
7229   }
7230   mmdata->cp = cp;
7231   C->product->data       = mmdata;
7232   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7233   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7234 
7235   /* memory type */
7236   mmdata->mtype = PETSC_MEMTYPE_HOST;
7237   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
7238   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
7239   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7240   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7241 
7242   /* prepare coo coordinates for values insertion */
7243 
7244   /* count total nonzeros of those intermediate seqaij Mats
7245     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7246     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7247     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7248   */
7249   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7250     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7251     if (mptmp[cp]) continue;
7252     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7253       const PetscInt *rmap = rmapa[cp];
7254       const PetscInt mr = mp[cp]->rmap->n;
7255       const PetscInt rs = C->rmap->rstart;
7256       const PetscInt re = C->rmap->rend;
7257       const PetscInt *ii  = mm->i;
7258       for (i = 0; i < mr; i++) {
7259         const PetscInt gr = rmap[i];
7260         const PetscInt nz = ii[i+1] - ii[i];
7261         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7262         else ncoo_oown += nz; /* this row is local */
7263       }
7264     } else ncoo_d += mm->nz;
7265   }
7266 
7267   /*
7268     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7269 
7270     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7271 
7272     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7273 
7274     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7275     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7276     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7277 
7278     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7279     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7280   */
7281   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
7282   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
7283 
7284   /* gather (i,j) of nonzeros inserted by remote procs */
7285   if (hasoffproc) {
7286     PetscSF  msf;
7287     PetscInt ncoo2,*coo_i2,*coo_j2;
7288 
7289     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
7290     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
7291     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
7292 
7293     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7294       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7295       PetscInt   *idxoff = mmdata->off[cp];
7296       PetscInt   *idxown = mmdata->own[cp];
7297       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7298         const PetscInt *rmap = rmapa[cp];
7299         const PetscInt *cmap = cmapa[cp];
7300         const PetscInt *ii  = mm->i;
7301         PetscInt       *coi = coo_i + ncoo_o;
7302         PetscInt       *coj = coo_j + ncoo_o;
7303         const PetscInt mr = mp[cp]->rmap->n;
7304         const PetscInt rs = C->rmap->rstart;
7305         const PetscInt re = C->rmap->rend;
7306         const PetscInt cs = C->cmap->rstart;
7307         for (i = 0; i < mr; i++) {
7308           const PetscInt *jj = mm->j + ii[i];
7309           const PetscInt gr  = rmap[i];
7310           const PetscInt nz  = ii[i+1] - ii[i];
7311           if (gr < rs || gr >= re) { /* this is an offproc row */
7312             for (j = ii[i]; j < ii[i+1]; j++) {
7313               *coi++ = gr;
7314               *idxoff++ = j;
7315             }
7316             if (!cmapt[cp]) { /* already global */
7317               for (j = 0; j < nz; j++) *coj++ = jj[j];
7318             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7319               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7320             } else { /* offdiag */
7321               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7322             }
7323             ncoo_o += nz;
7324           } else { /* this is a local row */
7325             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7326           }
7327         }
7328       }
7329       mmdata->off[cp + 1] = idxoff;
7330       mmdata->own[cp + 1] = idxown;
7331     }
7332 
7333     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
7334     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
7335     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
7336     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
7337     ncoo = ncoo_d + ncoo_oown + ncoo2;
7338     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
7339     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
7340     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7341     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7342     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
7343     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7344     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7345     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
7346     coo_i = coo_i2;
7347     coo_j = coo_j2;
7348   } else { /* no offproc values insertion */
7349     ncoo = ncoo_d;
7350     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
7351 
7352     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
7353     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
7354     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
7355   }
7356   mmdata->hasoffproc = hasoffproc;
7357 
7358   /* gather (i,j) of nonzeros inserted locally */
7359   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7360     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7361     PetscInt       *coi = coo_i + ncoo_d;
7362     PetscInt       *coj = coo_j + ncoo_d;
7363     const PetscInt *jj  = mm->j;
7364     const PetscInt *ii  = mm->i;
7365     const PetscInt *cmap = cmapa[cp];
7366     const PetscInt *rmap = rmapa[cp];
7367     const PetscInt mr = mp[cp]->rmap->n;
7368     const PetscInt rs = C->rmap->rstart;
7369     const PetscInt re = C->rmap->rend;
7370     const PetscInt cs = C->cmap->rstart;
7371 
7372     if (mptmp[cp]) continue;
7373     if (rmapt[cp] == 1) { /* consecutive rows */
7374       /* fill coo_i */
7375       for (i = 0; i < mr; i++) {
7376         const PetscInt gr = i + rs;
7377         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7378       }
7379       /* fill coo_j */
7380       if (!cmapt[cp]) { /* type-0, already global */
7381         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
7382       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7383         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7384       } else { /* type-2, local to global for sparse columns */
7385         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7386       }
7387       ncoo_d += mm->nz;
7388     } else if (rmapt[cp] == 2) { /* sparse rows */
7389       for (i = 0; i < mr; i++) {
7390         const PetscInt *jj = mm->j + ii[i];
7391         const PetscInt gr  = rmap[i];
7392         const PetscInt nz  = ii[i+1] - ii[i];
7393         if (gr >= rs && gr < re) { /* local rows */
7394           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7395           if (!cmapt[cp]) { /* type-0, already global */
7396             for (j = 0; j < nz; j++) *coj++ = jj[j];
7397           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7398             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7399           } else { /* type-2, local to global for sparse columns */
7400             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7401           }
7402           ncoo_d += nz;
7403         }
7404       }
7405     }
7406   }
7407   if (glob) {
7408     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
7409   }
7410   ierr = ISDestroy(&glob);CHKERRQ(ierr);
7411   if (P_oth_l2g) {
7412     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
7413   }
7414   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
7415   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7416   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
7417 
7418   /* preallocate with COO data */
7419   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
7420   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
7421   PetscFunctionReturn(0);
7422 }
7423 
7424 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7425 {
7426   Mat_Product    *product = mat->product;
7427   PetscErrorCode ierr;
7428 #if defined(PETSC_HAVE_DEVICE)
7429   PetscBool      match = PETSC_FALSE;
7430   PetscBool      usecpu = PETSC_FALSE;
7431 #else
7432   PetscBool      match = PETSC_TRUE;
7433 #endif
7434 
7435   PetscFunctionBegin;
7436   MatCheckProduct(mat,1);
7437 #if defined(PETSC_HAVE_DEVICE)
7438   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7439     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7440   }
7441   if (match) { /* we can always fallback to the CPU if requested */
7442     switch (product->type) {
7443     case MATPRODUCT_AB:
7444       if (product->api_user) {
7445         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7446         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7447         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7448       } else {
7449         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7450         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7451         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7452       }
7453       break;
7454     case MATPRODUCT_AtB:
7455       if (product->api_user) {
7456         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7457         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7458         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7459       } else {
7460         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7461         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7462         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7463       }
7464       break;
7465     case MATPRODUCT_PtAP:
7466       if (product->api_user) {
7467         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7468         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7469         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7470       } else {
7471         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7472         ierr = PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7473         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7474       }
7475       break;
7476     default:
7477       break;
7478     }
7479     match = (PetscBool)!usecpu;
7480   }
7481 #endif
7482   if (match) {
7483     switch (product->type) {
7484     case MATPRODUCT_AB:
7485     case MATPRODUCT_AtB:
7486     case MATPRODUCT_PtAP:
7487       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7488       break;
7489     default:
7490       break;
7491     }
7492   }
7493   /* fallback to MPIAIJ ops */
7494   if (!mat->ops->productsymbolic) {
7495     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7496   }
7497   PetscFunctionReturn(0);
7498 }
7499 
7500 /*
7501     Special version for direct calls from Fortran
7502 */
7503 #include <petsc/private/fortranimpl.h>
7504 
7505 /* Change these macros so can be used in void function */
7506 /* Identical to CHKERRV, except it assigns to *_ierr */
7507 #undef CHKERRQ
7508 #define CHKERRQ(ierr) do {                                                                     \
7509     PetscErrorCode ierr_msv_mpiaij = (ierr);                                                   \
7510     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7511       *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7512       return;                                                                                  \
7513     }                                                                                          \
7514   } while (0)
7515 
7516 #undef SETERRQ
7517 #define SETERRQ(comm,ierr,...) do {                                                            \
7518     *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7519     return;                                                                                    \
7520   } while (0)
7521 
7522 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7523 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7524 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7525 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7526 #else
7527 #endif
7528 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7529 {
7530   Mat            mat  = *mmat;
7531   PetscInt       m    = *mm, n = *mn;
7532   InsertMode     addv = *maddv;
7533   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
7534   PetscScalar    value;
7535   PetscErrorCode ierr;
7536 
7537   MatCheckPreallocated(mat,1);
7538   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7539   else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
7540   {
7541     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7542     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7543     PetscBool roworiented = aij->roworiented;
7544 
7545     /* Some Variables required in the macro */
7546     Mat        A                    = aij->A;
7547     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7548     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7549     MatScalar  *aa;
7550     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7551     Mat        B                    = aij->B;
7552     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7553     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7554     MatScalar  *ba;
7555     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7556      * cannot use "#if defined" inside a macro. */
7557     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7558 
7559     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7560     PetscInt  nonew = a->nonew;
7561     MatScalar *ap1,*ap2;
7562 
7563     PetscFunctionBegin;
7564     ierr = MatSeqAIJGetArray(A,&aa);CHKERRQ(ierr);
7565     ierr = MatSeqAIJGetArray(B,&ba);CHKERRQ(ierr);
7566     for (i=0; i<m; i++) {
7567       if (im[i] < 0) continue;
7568       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
7569       if (im[i] >= rstart && im[i] < rend) {
7570         row      = im[i] - rstart;
7571         lastcol1 = -1;
7572         rp1      = aj + ai[row];
7573         ap1      = aa + ai[row];
7574         rmax1    = aimax[row];
7575         nrow1    = ailen[row];
7576         low1     = 0;
7577         high1    = nrow1;
7578         lastcol2 = -1;
7579         rp2      = bj + bi[row];
7580         ap2      = ba + bi[row];
7581         rmax2    = bimax[row];
7582         nrow2    = bilen[row];
7583         low2     = 0;
7584         high2    = nrow2;
7585 
7586         for (j=0; j<n; j++) {
7587           if (roworiented) value = v[i*n+j];
7588           else value = v[i+j*m];
7589           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7590           if (in[j] >= cstart && in[j] < cend) {
7591             col = in[j] - cstart;
7592             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7593           } else if (in[j] < 0) continue;
7594           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7595             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7596             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7597           } else {
7598             if (mat->was_assembled) {
7599               if (!aij->colmap) {
7600                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
7601               }
7602 #if defined(PETSC_USE_CTABLE)
7603               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
7604               col--;
7605 #else
7606               col = aij->colmap[in[j]] - 1;
7607 #endif
7608               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7609                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
7610                 col  =  in[j];
7611                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7612                 B        = aij->B;
7613                 b        = (Mat_SeqAIJ*)B->data;
7614                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7615                 rp2      = bj + bi[row];
7616                 ap2      = ba + bi[row];
7617                 rmax2    = bimax[row];
7618                 nrow2    = bilen[row];
7619                 low2     = 0;
7620                 high2    = nrow2;
7621                 bm       = aij->B->rmap->n;
7622                 ba       = b->a;
7623                 inserted = PETSC_FALSE;
7624               }
7625             } else col = in[j];
7626             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7627           }
7628         }
7629       } else if (!aij->donotstash) {
7630         if (roworiented) {
7631           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
7632         } else {
7633           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
7634         }
7635       }
7636     }
7637     ierr = MatSeqAIJRestoreArray(A,&aa);CHKERRQ(ierr);
7638     ierr = MatSeqAIJRestoreArray(B,&ba);CHKERRQ(ierr);
7639   }
7640   PetscFunctionReturnVoid();
7641 }
7642 /* Undefining these here since they were redefined from their original definition above! No
7643  * other PETSc functions should be defined past this point, as it is impossible to recover the
7644  * original definitions */
7645 #undef CHKERRQ
7646 #undef SETERRQ
7647