xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a4ea9b214453c336800a18fd3eb7c63f9f9a33e8)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
66 {
67   PetscErrorCode ierr;
68   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
69 
70   PetscFunctionBegin;
71   if (mat->A) {
72     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
73     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
74   }
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
79 {
80   PetscErrorCode  ierr;
81   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
82   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
83   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
84   const PetscInt  *ia,*ib;
85   const MatScalar *aa,*bb,*aav,*bav;
86   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
87   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
88 
89   PetscFunctionBegin;
90   *keptrows = NULL;
91 
92   ia   = a->i;
93   ib   = b->i;
94   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
95   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) {
100       cnt++;
101       goto ok1;
102     }
103     aa = aav + ia[i];
104     for (j=0; j<na; j++) {
105       if (aa[j] != 0.0) goto ok1;
106     }
107     bb = bav + ib[i];
108     for (j=0; j <nb; j++) {
109       if (bb[j] != 0.0) goto ok1;
110     }
111     cnt++;
112 ok1:;
113   }
114   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
115   if (!n0rows) {
116     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
117     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
118     PetscFunctionReturn(0);
119   }
120   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
121   cnt  = 0;
122   for (i=0; i<m; i++) {
123     na = ia[i+1] - ia[i];
124     nb = ib[i+1] - ib[i];
125     if (!na && !nb) continue;
126     aa = aav + ia[i];
127     for (j=0; j<na;j++) {
128       if (aa[j] != 0.0) {
129         rows[cnt++] = rstart + i;
130         goto ok2;
131       }
132     }
133     bb = bav + ib[i];
134     for (j=0; j<nb; j++) {
135       if (bb[j] != 0.0) {
136         rows[cnt++] = rstart + i;
137         goto ok2;
138       }
139     }
140 ok2:;
141   }
142   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
143   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
149 {
150   PetscErrorCode    ierr;
151   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
152   PetscBool         cong;
153 
154   PetscFunctionBegin;
155   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
156   if (Y->assembled && cong) {
157     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
158   } else {
159     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
165 {
166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
167   PetscErrorCode ierr;
168   PetscInt       i,rstart,nrows,*rows;
169 
170   PetscFunctionBegin;
171   *zrows = NULL;
172   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
173   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
174   for (i=0; i<nrows; i++) rows[i] += rstart;
175   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
180 {
181   PetscErrorCode    ierr;
182   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
183   PetscInt          i,m,n,*garray = aij->garray;
184   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
185   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
186   PetscReal         *work;
187   const PetscScalar *dummy;
188 
189   PetscFunctionBegin;
190   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
191   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
192   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
193   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   if (type == NORM_2) {
197     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
198       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
199     }
200     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
201       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
202     }
203   } else if (type == NORM_1) {
204     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
205       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
206     }
207     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
208       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
209     }
210   } else if (type == NORM_INFINITY) {
211     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
212       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
213     }
214     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
215       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
216     }
217   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
219       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
220     }
221     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
222       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
223     }
224   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
225     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
226       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
227     }
228     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
229       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
230     }
231   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
234   } else {
235     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
236   }
237   ierr = PetscFree(work);CHKERRQ(ierr);
238   if (type == NORM_2) {
239     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i=0; i<n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
247 {
248   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
249   IS              sis,gis;
250   PetscErrorCode  ierr;
251   const PetscInt  *isis,*igis;
252   PetscInt        n,*iis,nsis,ngis,rstart,i;
253 
254   PetscFunctionBegin;
255   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
256   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
257   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
258   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
259   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
260   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
261 
262   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
263   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
264   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
265   n    = ngis + nsis;
266   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
267   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
268   for (i=0; i<n; i++) iis[i] += rstart;
269   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
270 
271   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
272   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
273   ierr = ISDestroy(&sis);CHKERRQ(ierr);
274   ierr = ISDestroy(&gis);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 /*
279   Local utility routine that creates a mapping from the global column
280 number to the local number in the off-diagonal part of the local
281 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
282 a slightly higher hash table cost; without it it is not scalable (each processor
283 has an order N integer array but is fast to access.
284 */
285 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       n = aij->B->cmap->n,i;
290 
291   PetscFunctionBegin;
292   if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
293 #if defined(PETSC_USE_CTABLE)
294   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
295   for (i=0; i<n; i++) {
296     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
297   }
298 #else
299   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
300   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
301   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
302 #endif
303   PetscFunctionReturn(0);
304 }
305 
306 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
307 { \
308     if (col <= lastcol1)  low1 = 0;     \
309     else                 high1 = nrow1; \
310     lastcol1 = col;\
311     while (high1-low1 > 5) { \
312       t = (low1+high1)/2; \
313       if (rp1[t] > col) high1 = t; \
314       else              low1  = t; \
315     } \
316       for (_i=low1; _i<high1; _i++) { \
317         if (rp1[_i] > col) break; \
318         if (rp1[_i] == col) { \
319           if (addv == ADD_VALUES) { \
320             ap1[_i] += value;   \
321             /* Not sure LogFlops will slow dow the code or not */ \
322             (void)PetscLogFlops(1.0);   \
323            } \
324           else                    ap1[_i] = value; \
325           inserted = PETSC_TRUE; \
326           goto a_noinsert; \
327         } \
328       }  \
329       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
330       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
331       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
332       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
333       N = nrow1++ - 1; a->nz++; high1++; \
334       /* shift up all the later entries in this row */ \
335       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
336       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
337       rp1[_i] = col;  \
338       ap1[_i] = value;  \
339       A->nonzerostate++;\
340       a_noinsert: ; \
341       ailen[row] = nrow1; \
342 }
343 
344 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
345   { \
346     if (col <= lastcol2) low2 = 0;                        \
347     else high2 = nrow2;                                   \
348     lastcol2 = col;                                       \
349     while (high2-low2 > 5) {                              \
350       t = (low2+high2)/2;                                 \
351       if (rp2[t] > col) high2 = t;                        \
352       else             low2  = t;                         \
353     }                                                     \
354     for (_i=low2; _i<high2; _i++) {                       \
355       if (rp2[_i] > col) break;                           \
356       if (rp2[_i] == col) {                               \
357         if (addv == ADD_VALUES) {                         \
358           ap2[_i] += value;                               \
359           (void)PetscLogFlops(1.0);                       \
360         }                                                 \
361         else                    ap2[_i] = value;          \
362         inserted = PETSC_TRUE;                            \
363         goto b_noinsert;                                  \
364       }                                                   \
365     }                                                     \
366     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
367     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
368     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
369     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
370     N = nrow2++ - 1; b->nz++; high2++;                    \
371     /* shift up all the later entries in this row */      \
372     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
373     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
374     rp2[_i] = col;                                        \
375     ap2[_i] = value;                                      \
376     B->nonzerostate++;                                    \
377     b_noinsert: ;                                         \
378     bilen[row] = nrow2;                                   \
379   }
380 
381 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
382 {
383   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
384   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
385   PetscErrorCode ierr;
386   PetscInt       l,*garray = mat->garray,diag;
387 
388   PetscFunctionBegin;
389   /* code only works for square matrices A */
390 
391   /* find size of row to the left of the diagonal part */
392   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
393   row  = row - diag;
394   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
395     if (garray[b->j[b->i[row]+l]] > diag) break;
396   }
397   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
398 
399   /* diagonal part */
400   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
401 
402   /* right of diagonal part */
403   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
404 #if defined(PETSC_HAVE_DEVICE)
405   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
406 #endif
407   PetscFunctionReturn(0);
408 }
409 
410 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscScalar    value = 0.0;
414   PetscErrorCode ierr;
415   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
416   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
417   PetscBool      roworiented = aij->roworiented;
418 
419   /* Some Variables required in the macro */
420   Mat        A                    = aij->A;
421   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
422   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
423   PetscBool  ignorezeroentries    = a->ignorezeroentries;
424   Mat        B                    = aij->B;
425   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
426   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
427   MatScalar  *aa,*ba;
428   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
429    * cannot use "#if defined" inside a macro. */
430   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
431 
432   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
433   PetscInt  nonew;
434   MatScalar *ap1,*ap2;
435 
436   PetscFunctionBegin;
437 #if defined(PETSC_HAVE_DEVICE)
438   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
439     const PetscScalar *dummy;
440     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
441     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
442   }
443   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
444     const PetscScalar *dummy;
445     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
446     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
447   }
448 #endif
449   aa = a->a;
450   ba = b->a;
451   for (i=0; i<m; i++) {
452     if (im[i] < 0) continue;
453     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
454     if (im[i] >= rstart && im[i] < rend) {
455       row      = im[i] - rstart;
456       lastcol1 = -1;
457       rp1      = aj + ai[row];
458       ap1      = aa + ai[row];
459       rmax1    = aimax[row];
460       nrow1    = ailen[row];
461       low1     = 0;
462       high1    = nrow1;
463       lastcol2 = -1;
464       rp2      = bj + bi[row];
465       ap2      = ba + bi[row];
466       rmax2    = bimax[row];
467       nrow2    = bilen[row];
468       low2     = 0;
469       high2    = nrow2;
470 
471       for (j=0; j<n; j++) {
472         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
473         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
474         if (in[j] >= cstart && in[j] < cend) {
475           col   = in[j] - cstart;
476           nonew = a->nonew;
477           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
478 #if defined(PETSC_HAVE_DEVICE)
479           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
480 #endif
481         } else if (in[j] < 0) continue;
482         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509               inserted = PETSC_FALSE;
510             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
511               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
512                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
513               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
514             }
515           } else col = in[j];
516           nonew = b->nonew;
517           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
518 #if defined(PETSC_HAVE_DEVICE)
519           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
520 #endif
521         }
522       }
523     } else {
524       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
525       if (!aij->donotstash) {
526         mat->assembled = PETSC_FALSE;
527         if (roworiented) {
528           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
529         } else {
530           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
531         }
532       }
533     }
534   }
535   PetscFunctionReturn(0);
536 }
537 
538 /*
539     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
540     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
541     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
542 */
543 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
544 {
545   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
546   Mat            A           = aij->A; /* diagonal part of the matrix */
547   Mat            B           = aij->B; /* offdiagonal part of the matrix */
548   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
549   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
550   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
551   PetscInt       *ailen      = a->ilen,*aj = a->j;
552   PetscInt       *bilen      = b->ilen,*bj = b->j;
553   PetscInt       am          = aij->A->rmap->n,j;
554   PetscInt       diag_so_far = 0,dnz;
555   PetscInt       offd_so_far = 0,onz;
556 
557   PetscFunctionBegin;
558   /* Iterate over all rows of the matrix */
559   for (j=0; j<am; j++) {
560     dnz = onz = 0;
561     /*  Iterate over all non-zero columns of the current row */
562     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
563       /* If column is in the diagonal */
564       if (mat_j[col] >= cstart && mat_j[col] < cend) {
565         aj[diag_so_far++] = mat_j[col] - cstart;
566         dnz++;
567       } else { /* off-diagonal entries */
568         bj[offd_so_far++] = mat_j[col];
569         onz++;
570       }
571     }
572     ailen[j] = dnz;
573     bilen[j] = onz;
574   }
575   PetscFunctionReturn(0);
576 }
577 
578 /*
579     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
580     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
581     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
582     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
583     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
584 */
585 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
586 {
587   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
588   Mat            A      = aij->A; /* diagonal part of the matrix */
589   Mat            B      = aij->B; /* offdiagonal part of the matrix */
590   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
591   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
592   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
593   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
594   PetscInt       *ailen = a->ilen,*aj = a->j;
595   PetscInt       *bilen = b->ilen,*bj = b->j;
596   PetscInt       am     = aij->A->rmap->n,j;
597   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
598   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
599   PetscScalar    *aa = a->a,*ba = b->a;
600 
601   PetscFunctionBegin;
602   /* Iterate over all rows of the matrix */
603   for (j=0; j<am; j++) {
604     dnz_row = onz_row = 0;
605     rowstart_offd = full_offd_i[j];
606     rowstart_diag = full_diag_i[j];
607     /*  Iterate over all non-zero columns of the current row */
608     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
609       /* If column is in the diagonal */
610       if (mat_j[col] >= cstart && mat_j[col] < cend) {
611         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
612         aa[rowstart_diag+dnz_row] = mat_a[col];
613         dnz_row++;
614       } else { /* off-diagonal entries */
615         bj[rowstart_offd+onz_row] = mat_j[col];
616         ba[rowstart_offd+onz_row] = mat_a[col];
617         onz_row++;
618       }
619     }
620     ailen[j] = dnz_row;
621     bilen[j] = onz_row;
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
627 {
628   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
631   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
632 
633   PetscFunctionBegin;
634   for (i=0; i<m; i++) {
635     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
636     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j=0; j<n; j++) {
640         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
641         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col  = idxn[j] - cstart;
644           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645         } else {
646           if (!aij->colmap) {
647             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
648           }
649 #if defined(PETSC_USE_CTABLE)
650           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
651           col--;
652 #else
653           col = aij->colmap[idxn[j]] - 1;
654 #endif
655           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
656           else {
657             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
658           }
659         }
660       }
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
676   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
677   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
682 {
683   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
684   PetscErrorCode ierr;
685   PetscMPIInt    n;
686   PetscInt       i,j,rstart,ncols,flg;
687   PetscInt       *row,*col;
688   PetscBool      other_disassembled;
689   PetscScalar    *val;
690 
691   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
692 
693   PetscFunctionBegin;
694   if (!aij->donotstash && !mat->nooffprocentries) {
695     while (1) {
696       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
697       if (!flg) break;
698 
699       for (i=0; i<n;) {
700         /* Now identify the consecutive vals belonging to the same row */
701         for (j=i,rstart=row[j]; j<n; j++) {
702           if (row[j] != rstart) break;
703         }
704         if (j < n) ncols = j-i;
705         else       ncols = n-i;
706         /* Now assemble all these values with a single function call */
707         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
708         i    = j;
709       }
710     }
711     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
712   }
713 #if defined(PETSC_HAVE_DEVICE)
714   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
715   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
716   if (mat->boundtocpu) {
717     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
718     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
719   }
720 #endif
721   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
722   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourself, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
732     if (mat->was_assembled && !other_disassembled) {
733 #if defined(PETSC_HAVE_DEVICE)
734       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
735 #endif
736       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
737     }
738   }
739   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
740     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
741   }
742   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
743 #if defined(PETSC_HAVE_DEVICE)
744   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
745 #endif
746   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
747   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
748 
749   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
750 
751   aij->rowvalues = NULL;
752 
753   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
754 
755   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
756   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
757     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
758     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
759   }
760 #if defined(PETSC_HAVE_DEVICE)
761   mat->offloadmask = PETSC_OFFLOAD_BOTH;
762 #endif
763   PetscFunctionReturn(0);
764 }
765 
766 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
767 {
768   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
773   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
778 {
779   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
780   PetscObjectState sA, sB;
781   PetscInt        *lrows;
782   PetscInt         r, len;
783   PetscBool        cong, lch, gch;
784   PetscErrorCode   ierr;
785 
786   PetscFunctionBegin;
787   /* get locally owned rows */
788   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
789   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
790   /* fix right hand side if needed */
791   if (x && b) {
792     const PetscScalar *xx;
793     PetscScalar       *bb;
794 
795     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
796     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
797     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
798     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
799     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
800     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
801   }
802 
803   sA = mat->A->nonzerostate;
804   sB = mat->B->nonzerostate;
805 
806   if (diag != 0.0 && cong) {
807     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
808     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
810     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
811     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
812     PetscInt   nnwA, nnwB;
813     PetscBool  nnzA, nnzB;
814 
815     nnwA = aijA->nonew;
816     nnwB = aijB->nonew;
817     nnzA = aijA->keepnonzeropattern;
818     nnzB = aijB->keepnonzeropattern;
819     if (!nnzA) {
820       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
821       aijA->nonew = 0;
822     }
823     if (!nnzB) {
824       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
825       aijB->nonew = 0;
826     }
827     /* Must zero here before the next loop */
828     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
829     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) {
831       const PetscInt row = lrows[r] + A->rmap->rstart;
832       if (row >= A->cmap->N) continue;
833       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
834     }
835     aijA->nonew = nnwA;
836     aijB->nonew = nnwB;
837   } else {
838     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
839     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840   }
841   ierr = PetscFree(lrows);CHKERRQ(ierr);
842   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
843   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
844 
845   /* reduce nonzerostate */
846   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
847   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
848   if (gch) A->nonzerostate++;
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
853 {
854   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode    ierr;
856   PetscMPIInt       n = A->rmap->n;
857   PetscInt          i,j,r,m,len = 0;
858   PetscInt          *lrows,*owners = A->rmap->range;
859   PetscMPIInt       p = 0;
860   PetscSFNode       *rrows;
861   PetscSF           sf;
862   const PetscScalar *xx;
863   PetscScalar       *bb,*mask;
864   Vec               xmask,lmask;
865   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
866   const PetscInt    *aj, *ii,*ridx;
867   PetscScalar       *aa;
868 
869   PetscFunctionBegin;
870   /* Create SF where leaves are input rows and roots are owned rows */
871   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
872   for (r = 0; r < n; ++r) lrows[r] = -1;
873   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
874   for (r = 0; r < N; ++r) {
875     const PetscInt idx   = rows[r];
876     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
877     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
878       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
879     }
880     rrows[r].rank  = p;
881     rrows[r].index = rows[r] - owners[p];
882   }
883   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
884   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
885   /* Collect flags for rows to be zeroed */
886   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
887   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
888   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
889   /* Compress and put in row numbers */
890   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
891   /* zero diagonal part of matrix */
892   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
893   /* handle off diagonal part of matrix */
894   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
895   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
896   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
897   for (i=0; i<len; i++) bb[lrows[i]] = 1;
898   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
899   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
902   if (x && b) { /* this code is buggy when the row and column layout don't match */
903     PetscBool cong;
904 
905     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
906     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
907     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
910     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
911   }
912   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
913   /* remove zeroed rows of off diagonal matrix */
914   ii = aij->i;
915   for (i=0; i<len; i++) {
916     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
917   }
918   /* loop over all elements of off process part of matrix zeroing removed columns*/
919   if (aij->compressedrow.use) {
920     m    = aij->compressedrow.nrows;
921     ii   = aij->compressedrow.i;
922     ridx = aij->compressedrow.rindex;
923     for (i=0; i<m; i++) {
924       n  = ii[i+1] - ii[i];
925       aj = aij->j + ii[i];
926       aa = aij->a + ii[i];
927 
928       for (j=0; j<n; j++) {
929         if (PetscAbsScalar(mask[*aj])) {
930           if (b) bb[*ridx] -= *aa*xx[*aj];
931           *aa = 0.0;
932         }
933         aa++;
934         aj++;
935       }
936       ridx++;
937     }
938   } else { /* do not use compressed row format */
939     m = l->B->rmap->n;
940     for (i=0; i<m; i++) {
941       n  = ii[i+1] - ii[i];
942       aj = aij->j + ii[i];
943       aa = aij->a + ii[i];
944       for (j=0; j<n; j++) {
945         if (PetscAbsScalar(mask[*aj])) {
946           if (b) bb[i] -= *aa*xx[*aj];
947           *aa = 0.0;
948         }
949         aa++;
950         aj++;
951       }
952     }
953   }
954   if (x && b) {
955     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
956     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
957   }
958   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
959   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
960   ierr = PetscFree(lrows);CHKERRQ(ierr);
961 
962   /* only change matrix nonzero state if pattern was allowed to be changed */
963   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
964     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
965     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
966   }
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
971 {
972   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode ierr;
974   PetscInt       nt;
975   VecScatter     Mvctx = a->Mvctx;
976 
977   PetscFunctionBegin;
978   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
979   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
980   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
982   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
988 {
989   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
998 {
999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1000   PetscErrorCode ierr;
1001   VecScatter     Mvctx = a->Mvctx;
1002 
1003   PetscFunctionBegin;
1004   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   /* do nondiagonal part */
1018   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1019   /* do local part */
1020   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1021   /* add partial results together */
1022   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1028 {
1029   MPI_Comm       comm;
1030   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1031   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1032   IS             Me,Notme;
1033   PetscErrorCode ierr;
1034   PetscInt       M,N,first,last,*notme,i;
1035   PetscBool      lf;
1036   PetscMPIInt    size;
1037 
1038   PetscFunctionBegin;
1039   /* Easy test: symmetric diagonal block */
1040   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1041   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1042   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1070 {
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1079 {
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   /* do nondiagonal part */
1085   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1086   /* do local part */
1087   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1088   /* add partial results together */
1089   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /*
1095   This only works correctly for square matrices where the subblock A->A is the
1096    diagonal block
1097 */
1098 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1099 {
1100   PetscErrorCode ierr;
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102 
1103   PetscFunctionBegin;
1104   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1105   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1106   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1117   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1147   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1148 
1149   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1157   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1159 #if defined(PETSC_HAVE_CUDA)
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1161 #endif
1162 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1164 #endif
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1166 #if defined(PETSC_HAVE_ELEMENTAL)
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1168 #endif
1169 #if defined(PETSC_HAVE_SCALAPACK)
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1171 #endif
1172 #if defined(PETSC_HAVE_HYPRE)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1175 #endif
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1182 #if defined(PETSC_HAVE_MKL_SPARSE)
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1184 #endif
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1192 {
1193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1194   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1195   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1196   const PetscInt    *garray = aij->garray;
1197   const PetscScalar *aa,*ba;
1198   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1199   PetscInt          *rowlens;
1200   PetscInt          *colidxs;
1201   PetscScalar       *matvals;
1202   PetscErrorCode    ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1206 
1207   M  = mat->rmap->N;
1208   N  = mat->cmap->N;
1209   m  = mat->rmap->n;
1210   rs = mat->rmap->rstart;
1211   cs = mat->cmap->rstart;
1212   nz = A->nz + B->nz;
1213 
1214   /* write matrix header */
1215   header[0] = MAT_FILE_CLASSID;
1216   header[1] = M; header[2] = N; header[3] = nz;
1217   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1218   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1219 
1220   /* fill in and store row lengths  */
1221   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1222   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1223   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1224   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1225 
1226   /* fill in and store column indices */
1227   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1228   for (cnt=0, i=0; i<m; i++) {
1229     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1230       if (garray[B->j[jb]] > cs) break;
1231       colidxs[cnt++] = garray[B->j[jb]];
1232     }
1233     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1234       colidxs[cnt++] = A->j[ja] + cs;
1235     for (; jb<B->i[i+1]; jb++)
1236       colidxs[cnt++] = garray[B->j[jb]];
1237   }
1238   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1239   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1240   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1241 
1242   /* fill in and store nonzero values */
1243   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1244   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1245   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1246   for (cnt=0, i=0; i<m; i++) {
1247     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1248       if (garray[B->j[jb]] > cs) break;
1249       matvals[cnt++] = ba[jb];
1250     }
1251     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1252       matvals[cnt++] = aa[ja];
1253     for (; jb<B->i[i+1]; jb++)
1254       matvals[cnt++] = ba[jb];
1255   }
1256   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1257   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1258   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1259   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1260   ierr = PetscFree(matvals);CHKERRQ(ierr);
1261 
1262   /* write block size option to the viewer's .info file */
1263   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #include <petscdraw.h>
1268 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1269 {
1270   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode    ierr;
1272   PetscMPIInt       rank = aij->rank,size = aij->size;
1273   PetscBool         isdraw,iascii,isbinary;
1274   PetscViewer       sviewer;
1275   PetscViewerFormat format;
1276 
1277   PetscFunctionBegin;
1278   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1279   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1280   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1281   if (iascii) {
1282     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1283     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1284       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1285       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1286       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1287       for (i=0; i<(PetscInt)size; i++) {
1288         nmax = PetscMax(nmax,nz[i]);
1289         nmin = PetscMin(nmin,nz[i]);
1290         navg += nz[i];
1291       }
1292       ierr = PetscFree(nz);CHKERRQ(ierr);
1293       navg = navg/size;
1294       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1295       PetscFunctionReturn(0);
1296     }
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1299       MatInfo   info;
1300       PetscInt *inodes=NULL;
1301 
1302       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1303       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1306       if (!inodes) {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1309       } else {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1312       }
1313       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1315       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1317       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1320       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1323       PetscInt inodecount,inodelimit,*inodes;
1324       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1325       if (inodes) {
1326         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1329       }
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1332       PetscFunctionReturn(0);
1333     }
1334   } else if (isbinary) {
1335     if (size == 1) {
1336       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1340     }
1341     PetscFunctionReturn(0);
1342   } else if (iascii && size == 1) {
1343     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1344     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1345     PetscFunctionReturn(0);
1346   } else if (isdraw) {
1347     PetscDraw draw;
1348     PetscBool isnull;
1349     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1350     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1351     if (isnull) PetscFunctionReturn(0);
1352   }
1353 
1354   { /* assemble the entire matrix onto first processor */
1355     Mat A = NULL, Av;
1356     IS  isrow,iscol;
1357 
1358     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1359     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1360     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1361     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1362 /*  The commented code uses MatCreateSubMatrices instead */
1363 /*
1364     Mat *AA, A = NULL, Av;
1365     IS  isrow,iscol;
1366 
1367     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1368     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1369     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1370     if (rank == 0) {
1371        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1372        A    = AA[0];
1373        Av   = AA[0];
1374     }
1375     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1376 */
1377     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1378     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1379     /*
1380        Everyone has to call to draw the matrix since the graphics waits are
1381        synchronized across all processors that share the PetscDraw object
1382     */
1383     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1384     if (rank == 0) {
1385       if (((PetscObject)mat)->name) {
1386         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1387       }
1388       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1389     }
1390     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1391     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1392     ierr = MatDestroy(&A);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1398 {
1399   PetscErrorCode ierr;
1400   PetscBool      iascii,isdraw,issocket,isbinary;
1401 
1402   PetscFunctionBegin;
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1404   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1405   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1407   if (iascii || isdraw || isbinary || issocket) {
1408     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1414 {
1415   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1416   PetscErrorCode ierr;
1417   Vec            bb1 = NULL;
1418   PetscBool      hasop;
1419 
1420   PetscFunctionBegin;
1421   if (flag == SOR_APPLY_UPPER) {
1422     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1423     PetscFunctionReturn(0);
1424   }
1425 
1426   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1427     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1428   }
1429 
1430   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1433       its--;
1434     }
1435 
1436     while (its--) {
1437       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1438       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1442       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1443 
1444       /* local sweep */
1445       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1446     }
1447   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1450       its--;
1451     }
1452     while (its--) {
1453       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1454       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1458       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1459 
1460       /* local sweep */
1461       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1462     }
1463   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1464     if (flag & SOR_ZERO_INITIAL_GUESS) {
1465       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1466       its--;
1467     }
1468     while (its--) {
1469       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471 
1472       /* update rhs: bb1 = bb - B*x */
1473       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1474       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1475 
1476       /* local sweep */
1477       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1478     }
1479   } else if (flag & SOR_EISENSTAT) {
1480     Vec xx1;
1481 
1482     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1483     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1484 
1485     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487     if (!mat->diag) {
1488       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1489       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1490     }
1491     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1492     if (hasop) {
1493       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1494     } else {
1495       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1496     }
1497     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1498 
1499     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1500 
1501     /* local sweep */
1502     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1503     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1504     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1505   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1506 
1507   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1508 
1509   matin->factorerrortype = mat->A->factorerrortype;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1514 {
1515   Mat            aA,aB,Aperm;
1516   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1517   PetscScalar    *aa,*ba;
1518   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1519   PetscSF        rowsf,sf;
1520   IS             parcolp = NULL;
1521   PetscBool      done;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1526   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1527   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1528   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1529 
1530   /* Invert row permutation to find out where my rows should go */
1531   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1532   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1533   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1534   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1535   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1536   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1537 
1538   /* Invert column permutation to find out where my columns should go */
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1542   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1543   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1545   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1546 
1547   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1548   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1549   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1550 
1551   /* Find out where my gcols should go */
1552   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1553   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1554   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1555   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1556   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1557   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1558   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1559   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1560 
1561   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1562   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1563   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1564   for (i=0; i<m; i++) {
1565     PetscInt    row = rdest[i];
1566     PetscMPIInt rowner;
1567     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1568     for (j=ai[i]; j<ai[i+1]; j++) {
1569       PetscInt    col = cdest[aj[j]];
1570       PetscMPIInt cowner;
1571       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1572       if (rowner == cowner) dnnz[i]++;
1573       else onnz[i]++;
1574     }
1575     for (j=bi[i]; j<bi[i+1]; j++) {
1576       PetscInt    col = gcdest[bj[j]];
1577       PetscMPIInt cowner;
1578       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1579       if (rowner == cowner) dnnz[i]++;
1580       else onnz[i]++;
1581     }
1582   }
1583   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1588 
1589   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1590   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1591   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1594     PetscInt j0,rowlen;
1595     rowlen = ai[i+1] - ai[i];
1596     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1597       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1598       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1599     }
1600     rowlen = bi[i+1] - bi[i];
1601     for (j0=j=0; j<rowlen; j0=j) {
1602       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1603       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1604     }
1605   }
1606   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1609   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1610   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1611   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1612   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1614   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1615   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1616   *B = Aperm;
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1621 {
1622   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1627   if (ghosts) *ghosts = aij->garray;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1632 {
1633   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1634   Mat            A    = mat->A,B = mat->B;
1635   PetscErrorCode ierr;
1636   PetscLogDouble isend[5],irecv[5];
1637 
1638   PetscFunctionBegin;
1639   info->block_size = 1.0;
1640   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1641 
1642   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1643   isend[3] = info->memory;  isend[4] = info->mallocs;
1644 
1645   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1646 
1647   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;  isend[4] += info->mallocs;
1649   if (flag == MAT_LOCAL) {
1650     info->nz_used      = isend[0];
1651     info->nz_allocated = isend[1];
1652     info->nz_unneeded  = isend[2];
1653     info->memory       = isend[3];
1654     info->mallocs      = isend[4];
1655   } else if (flag == MAT_GLOBAL_MAX) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   } else if (flag == MAT_GLOBAL_SUM) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   }
1672   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1673   info->fill_ratio_needed = 0;
1674   info->factor_mallocs    = 0;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1679 {
1680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1681   PetscErrorCode ierr;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692   case MAT_FORM_EXPLICIT_TRANSPOSE:
1693     MatCheckPreallocated(A,1);
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1696     break;
1697   case MAT_ROW_ORIENTED:
1698     MatCheckPreallocated(A,1);
1699     a->roworiented = flg;
1700 
1701     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1702     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1703     break;
1704   case MAT_FORCE_DIAGONAL_ENTRIES:
1705   case MAT_SORTED_FULL:
1706     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1712   case MAT_SPD:
1713   case MAT_SYMMETRIC:
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715   case MAT_HERMITIAN:
1716   case MAT_SYMMETRY_ETERNAL:
1717     break;
1718   case MAT_SUBMAT_SINGLEIS:
1719     A->submat_singleis = flg;
1720     break;
1721   case MAT_STRUCTURE_ONLY:
1722     /* The option is handled directly by MatSetOption() */
1723     break;
1724   default:
1725     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1731 {
1732   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1733   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1734   PetscErrorCode ierr;
1735   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1736   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1737   PetscInt       *cmap,*idx_p;
1738 
1739   PetscFunctionBegin;
1740   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1741   mat->getrowactive = PETSC_TRUE;
1742 
1743   if (!mat->rowvalues && (idx || v)) {
1744     /*
1745         allocate enough space to hold information from the longest row.
1746     */
1747     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1748     PetscInt   max = 1,tmp;
1749     for (i=0; i<matin->rmap->n; i++) {
1750       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1751       if (max < tmp) max = tmp;
1752     }
1753     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1754   }
1755 
1756   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1757   lrow = row - rstart;
1758 
1759   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1760   if (!v)   {pvA = NULL; pvB = NULL;}
1761   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1762   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1763   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1764   nztot = nzA + nzB;
1765 
1766   cmap = mat->garray;
1767   if (v  || idx) {
1768     if (nztot) {
1769       /* Sort by increasing column numbers, assuming A and B already sorted */
1770       PetscInt imark = -1;
1771       if (v) {
1772         *v = v_p = mat->rowvalues;
1773         for (i=0; i<nzB; i++) {
1774           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1775           else break;
1776         }
1777         imark = i;
1778         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1779         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1780       }
1781       if (idx) {
1782         *idx = idx_p = mat->rowindices;
1783         if (imark > -1) {
1784           for (i=0; i<imark; i++) {
1785             idx_p[i] = cmap[cworkB[i]];
1786           }
1787         } else {
1788           for (i=0; i<nzB; i++) {
1789             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1790             else break;
1791           }
1792           imark = i;
1793         }
1794         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1795         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1796       }
1797     } else {
1798       if (idx) *idx = NULL;
1799       if (v)   *v   = NULL;
1800     }
1801   }
1802   *nz  = nztot;
1803   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1804   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1809 {
1810   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1811 
1812   PetscFunctionBegin;
1813   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1814   aij->getrowactive = PETSC_FALSE;
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1819 {
1820   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1821   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1822   PetscErrorCode ierr;
1823   PetscInt       i,j,cstart = mat->cmap->rstart;
1824   PetscReal      sum = 0.0;
1825   MatScalar      *v;
1826 
1827   PetscFunctionBegin;
1828   if (aij->size == 1) {
1829     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1830   } else {
1831     if (type == NORM_FROBENIUS) {
1832       v = amat->a;
1833       for (i=0; i<amat->nz; i++) {
1834         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1835       }
1836       v = bmat->a;
1837       for (i=0; i<bmat->nz; i++) {
1838         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1839       }
1840       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1841       *norm = PetscSqrtReal(*norm);
1842       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1843     } else if (type == NORM_1) { /* max column norm */
1844       PetscReal *tmp,*tmp2;
1845       PetscInt  *jj,*garray = aij->garray;
1846       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1847       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1848       *norm = 0.0;
1849       v     = amat->a; jj = amat->j;
1850       for (j=0; j<amat->nz; j++) {
1851         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1852       }
1853       v = bmat->a; jj = bmat->j;
1854       for (j=0; j<bmat->nz; j++) {
1855         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1856       }
1857       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1858       for (j=0; j<mat->cmap->N; j++) {
1859         if (tmp2[j] > *norm) *norm = tmp2[j];
1860       }
1861       ierr = PetscFree(tmp);CHKERRQ(ierr);
1862       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1863       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1864     } else if (type == NORM_INFINITY) { /* max row norm */
1865       PetscReal ntemp = 0.0;
1866       for (j=0; j<aij->A->rmap->n; j++) {
1867         v   = amat->a + amat->i[j];
1868         sum = 0.0;
1869         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1870           sum += PetscAbsScalar(*v); v++;
1871         }
1872         v = bmat->a + bmat->i[j];
1873         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1874           sum += PetscAbsScalar(*v); v++;
1875         }
1876         if (sum > ntemp) ntemp = sum;
1877       }
1878       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1879       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1880     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1886 {
1887   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1888   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1889   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1890   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1891   PetscErrorCode  ierr;
1892   Mat             B,A_diag,*B_diag;
1893   const MatScalar *pbv,*bv;
1894 
1895   PetscFunctionBegin;
1896   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1897   ai = Aloc->i; aj = Aloc->j;
1898   bi = Bloc->i; bj = Bloc->j;
1899   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1900     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1901     PetscSFNode          *oloc;
1902     PETSC_UNUSED PetscSF sf;
1903 
1904     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1905     /* compute d_nnz for preallocation */
1906     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1907     for (i=0; i<ai[ma]; i++) {
1908       d_nnz[aj[i]]++;
1909     }
1910     /* compute local off-diagonal contributions */
1911     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1912     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1913     /* map those to global */
1914     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1915     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1916     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1917     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1918     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1919     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1920     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1921 
1922     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1923     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1924     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1925     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1926     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1927     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1928   } else {
1929     B    = *matout;
1930     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1931   }
1932 
1933   b           = (Mat_MPIAIJ*)B->data;
1934   A_diag      = a->A;
1935   B_diag      = &b->A;
1936   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1937   A_diag_ncol = A_diag->cmap->N;
1938   B_diag_ilen = sub_B_diag->ilen;
1939   B_diag_i    = sub_B_diag->i;
1940 
1941   /* Set ilen for diagonal of B */
1942   for (i=0; i<A_diag_ncol; i++) {
1943     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1944   }
1945 
1946   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1947   very quickly (=without using MatSetValues), because all writes are local. */
1948   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1949 
1950   /* copy over the B part */
1951   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1952   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1953   pbv  = bv;
1954   row  = A->rmap->rstart;
1955   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i=0; i<mb; i++) {
1958     ncol = bi[i+1]-bi[i];
1959     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1960     row++;
1961     pbv += ncol; cols_tmp += ncol;
1962   }
1963   ierr = PetscFree(cols);CHKERRQ(ierr);
1964   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1965 
1966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1969     *matout = B;
1970   } else {
1971     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1977 {
1978   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1979   Mat            a    = aij->A,b = aij->B;
1980   PetscErrorCode ierr;
1981   PetscInt       s1,s2,s3;
1982 
1983   PetscFunctionBegin;
1984   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1985   if (rr) {
1986     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1987     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1990   }
1991   if (ll) {
1992     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1993     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1994     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1995   }
1996   /* scale  the diagonal block */
1997   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2002     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2010   PetscErrorCode ierr;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2018 {
2019   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2020   Mat            a,b,c,d;
2021   PetscBool      flg;
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   a = matA->A; b = matA->B;
2026   c = matB->A; d = matB->B;
2027 
2028   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2029   if (flg) {
2030     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2031   }
2032   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2037 {
2038   PetscErrorCode ierr;
2039   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2040   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2041 
2042   PetscFunctionBegin;
2043   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2044   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2045     /* because of the column compression in the off-processor part of the matrix a->B,
2046        the number of columns in a->B and b->B may be different, hence we cannot call
2047        the MatCopy() directly on the two parts. If need be, we can provide a more
2048        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2049        then copying the submatrices */
2050     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2051   } else {
2052     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2053     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2054   }
2055   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2060 {
2061   PetscErrorCode ierr;
2062 
2063   PetscFunctionBegin;
2064   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2073 {
2074   PetscInt       i,j,k,nzx,nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i=0; i<m; i++) {
2079     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2080     nzx = xi[i+1] - xi[i];
2081     nzy = yi[i+1] - yi[i];
2082     nnz[i] = 0;
2083     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2084       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k<nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2095 {
2096   PetscErrorCode ierr;
2097   PetscInt       m = Y->rmap->N;
2098   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2099   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2100 
2101   PetscFunctionBegin;
2102   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2107 {
2108   PetscErrorCode ierr;
2109   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2110 
2111   PetscFunctionBegin;
2112   if (str == SAME_NONZERO_PATTERN) {
2113     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2114     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2115   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2116     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2117   } else {
2118     Mat      B;
2119     PetscInt *nnz_d,*nnz_o;
2120 
2121     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2122     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2123     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2124     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2125     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2126     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2127     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2128     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2129     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2130     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2131     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2132     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2133     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2134   }
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2139 
2140 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2141 {
2142 #if defined(PETSC_USE_COMPLEX)
2143   PetscErrorCode ierr;
2144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2148   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2149 #else
2150   PetscFunctionBegin;
2151 #endif
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2156 {
2157   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2162   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2167 {
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2173   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2178 {
2179   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2180   PetscErrorCode    ierr;
2181   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2182   PetscScalar       *va,*vv;
2183   Vec               vB,vA;
2184   const PetscScalar *vb;
2185 
2186   PetscFunctionBegin;
2187   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2188   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2189 
2190   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2191   if (idx) {
2192     for (i=0; i<m; i++) {
2193       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2194     }
2195   }
2196 
2197   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2198   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2199   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2200 
2201   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2202   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2203   for (i=0; i<m; i++) {
2204     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2205       vv[i] = vb[i];
2206       if (idx) idx[i] = a->garray[idxb[i]];
2207     } else {
2208       vv[i] = va[i];
2209       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2210         idx[i] = a->garray[idxb[i]];
2211     }
2212   }
2213   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2214   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2215   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2216   ierr = PetscFree(idxb);CHKERRQ(ierr);
2217   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2218   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2223 {
2224   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2225   PetscInt          m = A->rmap->n,n = A->cmap->n;
2226   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2227   PetscInt          *cmap  = mat->garray;
2228   PetscInt          *diagIdx, *offdiagIdx;
2229   Vec               diagV, offdiagV;
2230   PetscScalar       *a, *diagA, *offdiagA;
2231   const PetscScalar *ba,*bav;
2232   PetscInt          r,j,col,ncols,*bi,*bj;
2233   PetscErrorCode    ierr;
2234   Mat               B = mat->B;
2235   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2236 
2237   PetscFunctionBegin;
2238   /* When a process holds entire A and other processes have no entry */
2239   if (A->cmap->N == n) {
2240     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2241     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2242     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2243     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2244     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2245     PetscFunctionReturn(0);
2246   } else if (n == 0) {
2247     if (m) {
2248       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2249       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2250       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2251     }
2252     PetscFunctionReturn(0);
2253   }
2254 
2255   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2256   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2257   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2258   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2259 
2260   /* Get offdiagIdx[] for implicit 0.0 */
2261   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2262   ba   = bav;
2263   bi   = b->i;
2264   bj   = b->j;
2265   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2266   for (r = 0; r < m; r++) {
2267     ncols = bi[r+1] - bi[r];
2268     if (ncols == A->cmap->N - n) { /* Brow is dense */
2269       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2270     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2271       offdiagA[r] = 0.0;
2272 
2273       /* Find first hole in the cmap */
2274       for (j=0; j<ncols; j++) {
2275         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2276         if (col > j && j < cstart) {
2277           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2278           break;
2279         } else if (col > j + n && j >= cstart) {
2280           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2281           break;
2282         }
2283       }
2284       if (j == ncols && ncols < A->cmap->N - n) {
2285         /* a hole is outside compressed Bcols */
2286         if (ncols == 0) {
2287           if (cstart) {
2288             offdiagIdx[r] = 0;
2289           } else offdiagIdx[r] = cend;
2290         } else { /* ncols > 0 */
2291           offdiagIdx[r] = cmap[ncols-1] + 1;
2292           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2293         }
2294       }
2295     }
2296 
2297     for (j=0; j<ncols; j++) {
2298       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2299       ba++; bj++;
2300     }
2301   }
2302 
2303   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2304   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2305   for (r = 0; r < m; ++r) {
2306     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2307       a[r]   = diagA[r];
2308       if (idx) idx[r] = cstart + diagIdx[r];
2309     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) {
2312         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2313           idx[r] = cstart + diagIdx[r];
2314         } else idx[r] = offdiagIdx[r];
2315       }
2316     } else {
2317       a[r]   = offdiagA[r];
2318       if (idx) idx[r] = offdiagIdx[r];
2319     }
2320   }
2321   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2322   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2323   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2324   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2325   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2326   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2327   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2332 {
2333   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2334   PetscInt          m = A->rmap->n,n = A->cmap->n;
2335   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2336   PetscInt          *cmap  = mat->garray;
2337   PetscInt          *diagIdx, *offdiagIdx;
2338   Vec               diagV, offdiagV;
2339   PetscScalar       *a, *diagA, *offdiagA;
2340   const PetscScalar *ba,*bav;
2341   PetscInt          r,j,col,ncols,*bi,*bj;
2342   PetscErrorCode    ierr;
2343   Mat               B = mat->B;
2344   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2345 
2346   PetscFunctionBegin;
2347   /* When a process holds entire A and other processes have no entry */
2348   if (A->cmap->N == n) {
2349     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2350     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2351     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2352     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2353     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2354     PetscFunctionReturn(0);
2355   } else if (n == 0) {
2356     if (m) {
2357       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2358       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2359       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2360     }
2361     PetscFunctionReturn(0);
2362   }
2363 
2364   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2365   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2366   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2367   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2368 
2369   /* Get offdiagIdx[] for implicit 0.0 */
2370   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2371   ba   = bav;
2372   bi   = b->i;
2373   bj   = b->j;
2374   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2375   for (r = 0; r < m; r++) {
2376     ncols = bi[r+1] - bi[r];
2377     if (ncols == A->cmap->N - n) { /* Brow is dense */
2378       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2379     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2380       offdiagA[r] = 0.0;
2381 
2382       /* Find first hole in the cmap */
2383       for (j=0; j<ncols; j++) {
2384         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2385         if (col > j && j < cstart) {
2386           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2387           break;
2388         } else if (col > j + n && j >= cstart) {
2389           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2390           break;
2391         }
2392       }
2393       if (j == ncols && ncols < A->cmap->N - n) {
2394         /* a hole is outside compressed Bcols */
2395         if (ncols == 0) {
2396           if (cstart) {
2397             offdiagIdx[r] = 0;
2398           } else offdiagIdx[r] = cend;
2399         } else { /* ncols > 0 */
2400           offdiagIdx[r] = cmap[ncols-1] + 1;
2401           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2402         }
2403       }
2404     }
2405 
2406     for (j=0; j<ncols; j++) {
2407       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2408       ba++; bj++;
2409     }
2410   }
2411 
2412   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2413   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2414   for (r = 0; r < m; ++r) {
2415     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2416       a[r]   = diagA[r];
2417       if (idx) idx[r] = cstart + diagIdx[r];
2418     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) {
2421         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2422           idx[r] = cstart + diagIdx[r];
2423         } else idx[r] = offdiagIdx[r];
2424       }
2425     } else {
2426       a[r]   = offdiagA[r];
2427       if (idx) idx[r] = offdiagIdx[r];
2428     }
2429   }
2430   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2431   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2432   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2433   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2434   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2435   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2436   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2441 {
2442   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2443   PetscInt          m = A->rmap->n,n = A->cmap->n;
2444   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2445   PetscInt          *cmap  = mat->garray;
2446   PetscInt          *diagIdx, *offdiagIdx;
2447   Vec               diagV, offdiagV;
2448   PetscScalar       *a, *diagA, *offdiagA;
2449   const PetscScalar *ba,*bav;
2450   PetscInt          r,j,col,ncols,*bi,*bj;
2451   PetscErrorCode    ierr;
2452   Mat               B = mat->B;
2453   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2454 
2455   PetscFunctionBegin;
2456   /* When a process holds entire A and other processes have no entry */
2457   if (A->cmap->N == n) {
2458     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2459     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2460     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2461     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2462     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2463     PetscFunctionReturn(0);
2464   } else if (n == 0) {
2465     if (m) {
2466       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2467       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2468       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2469     }
2470     PetscFunctionReturn(0);
2471   }
2472 
2473   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2474   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2475   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2476   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2477 
2478   /* Get offdiagIdx[] for implicit 0.0 */
2479   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2480   ba   = bav;
2481   bi   = b->i;
2482   bj   = b->j;
2483   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2484   for (r = 0; r < m; r++) {
2485     ncols = bi[r+1] - bi[r];
2486     if (ncols == A->cmap->N - n) { /* Brow is dense */
2487       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2488     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2489       offdiagA[r] = 0.0;
2490 
2491       /* Find first hole in the cmap */
2492       for (j=0; j<ncols; j++) {
2493         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2494         if (col > j && j < cstart) {
2495           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2496           break;
2497         } else if (col > j + n && j >= cstart) {
2498           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2499           break;
2500         }
2501       }
2502       if (j == ncols && ncols < A->cmap->N - n) {
2503         /* a hole is outside compressed Bcols */
2504         if (ncols == 0) {
2505           if (cstart) {
2506             offdiagIdx[r] = 0;
2507           } else offdiagIdx[r] = cend;
2508         } else { /* ncols > 0 */
2509           offdiagIdx[r] = cmap[ncols-1] + 1;
2510           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2511         }
2512       }
2513     }
2514 
2515     for (j=0; j<ncols; j++) {
2516       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2517       ba++; bj++;
2518     }
2519   }
2520 
2521   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2522   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2523   for (r = 0; r < m; ++r) {
2524     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2525       a[r] = diagA[r];
2526       if (idx) idx[r] = cstart + diagIdx[r];
2527     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2528       a[r] = diagA[r];
2529       if (idx) {
2530         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2531           idx[r] = cstart + diagIdx[r];
2532         } else idx[r] = offdiagIdx[r];
2533       }
2534     } else {
2535       a[r] = offdiagA[r];
2536       if (idx) idx[r] = offdiagIdx[r];
2537     }
2538   }
2539   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2540   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2541   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2542   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2543   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2544   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2545   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2550 {
2551   PetscErrorCode ierr;
2552   Mat            *dummy;
2553 
2554   PetscFunctionBegin;
2555   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2556   *newmat = *dummy;
2557   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2562 {
2563   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2568   A->factorerrortype = a->A->factorerrortype;
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2573 {
2574   PetscErrorCode ierr;
2575   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2576 
2577   PetscFunctionBegin;
2578   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2579   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2580   if (x->assembled) {
2581     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2582   } else {
2583     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2584   }
2585   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2591 {
2592   PetscFunctionBegin;
2593   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2594   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 /*@
2599    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2600 
2601    Collective on Mat
2602 
2603    Input Parameters:
2604 +    A - the matrix
2605 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2606 
2607  Level: advanced
2608 
2609 @*/
2610 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2611 {
2612   PetscErrorCode       ierr;
2613 
2614   PetscFunctionBegin;
2615   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2620 {
2621   PetscErrorCode       ierr;
2622   PetscBool            sc = PETSC_FALSE,flg;
2623 
2624   PetscFunctionBegin;
2625   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2626   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2627   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2628   if (flg) {
2629     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2630   }
2631   ierr = PetscOptionsTail();CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2636 {
2637   PetscErrorCode ierr;
2638   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2639   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2640 
2641   PetscFunctionBegin;
2642   if (!Y->preallocated) {
2643     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2644   } else if (!aij->nz) {
2645     PetscInt nonew = aij->nonew;
2646     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2647     aij->nonew = nonew;
2648   }
2649   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2654 {
2655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2656   PetscErrorCode ierr;
2657 
2658   PetscFunctionBegin;
2659   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2660   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2661   if (d) {
2662     PetscInt rstart;
2663     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2664     *d += rstart;
2665 
2666   }
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2671 {
2672   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 /* -------------------------------------------------------------------*/
2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2682                                        MatGetRow_MPIAIJ,
2683                                        MatRestoreRow_MPIAIJ,
2684                                        MatMult_MPIAIJ,
2685                                 /* 4*/ MatMultAdd_MPIAIJ,
2686                                        MatMultTranspose_MPIAIJ,
2687                                        MatMultTransposeAdd_MPIAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                 /*10*/ NULL,
2692                                        NULL,
2693                                        NULL,
2694                                        MatSOR_MPIAIJ,
2695                                        MatTranspose_MPIAIJ,
2696                                 /*15*/ MatGetInfo_MPIAIJ,
2697                                        MatEqual_MPIAIJ,
2698                                        MatGetDiagonal_MPIAIJ,
2699                                        MatDiagonalScale_MPIAIJ,
2700                                        MatNorm_MPIAIJ,
2701                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2702                                        MatAssemblyEnd_MPIAIJ,
2703                                        MatSetOption_MPIAIJ,
2704                                        MatZeroEntries_MPIAIJ,
2705                                 /*24*/ MatZeroRows_MPIAIJ,
2706                                        NULL,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                 /*29*/ MatSetUp_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                        MatGetDiagonalBlock_MPIAIJ,
2714                                        NULL,
2715                                 /*34*/ MatDuplicate_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*39*/ MatAXPY_MPIAIJ,
2721                                        MatCreateSubMatrices_MPIAIJ,
2722                                        MatIncreaseOverlap_MPIAIJ,
2723                                        MatGetValues_MPIAIJ,
2724                                        MatCopy_MPIAIJ,
2725                                 /*44*/ MatGetRowMax_MPIAIJ,
2726                                        MatScale_MPIAIJ,
2727                                        MatShift_MPIAIJ,
2728                                        MatDiagonalSet_MPIAIJ,
2729                                        MatZeroRowsColumns_MPIAIJ,
2730                                 /*49*/ MatSetRandom_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2736                                        NULL,
2737                                        MatSetUnfactored_MPIAIJ,
2738                                        MatPermute_MPIAIJ,
2739                                        NULL,
2740                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2741                                        MatDestroy_MPIAIJ,
2742                                        MatView_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                 /*64*/ NULL,
2746                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2747                                        NULL,
2748                                        NULL,
2749                                        NULL,
2750                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2751                                        MatGetRowMinAbs_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                 /*75*/ MatFDColoringApply_AIJ,
2757                                        MatSetFromOptions_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        MatFindZeroDiagonals_MPIAIJ,
2761                                 /*80*/ NULL,
2762                                        NULL,
2763                                        NULL,
2764                                 /*83*/ MatLoad_MPIAIJ,
2765                                        MatIsSymmetric_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                 /*89*/ NULL,
2771                                        NULL,
2772                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        NULL,
2779                                        MatBindToCPU_MPIAIJ,
2780                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                        MatConjugate_MPIAIJ,
2784                                        NULL,
2785                                 /*104*/MatSetValuesRow_MPIAIJ,
2786                                        MatRealPart_MPIAIJ,
2787                                        MatImaginaryPart_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                 /*109*/NULL,
2791                                        NULL,
2792                                        MatGetRowMin_MPIAIJ,
2793                                        NULL,
2794                                        MatMissingDiagonal_MPIAIJ,
2795                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2796                                        NULL,
2797                                        MatGetGhosts_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        MatGetMultiProcBlock_MPIAIJ,
2805                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2806                                        MatGetColumnReductions_MPIAIJ,
2807                                        MatInvertBlockDiagonal_MPIAIJ,
2808                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2809                                        MatCreateSubMatricesMPI_MPIAIJ,
2810                                 /*129*/NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2814                                        NULL,
2815                                 /*134*/NULL,
2816                                        NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        NULL,
2820                                 /*139*/MatSetBlockSizes_MPIAIJ,
2821                                        NULL,
2822                                        NULL,
2823                                        MatFDColoringSetUp_MPIXAIJ,
2824                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2825                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2826                                 /*145*/NULL,
2827                                        NULL,
2828                                        NULL
2829 };
2830 
2831 /* ----------------------------------------------------------------------------------------*/
2832 
2833 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2834 {
2835   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2840   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2845 {
2846   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2847   PetscErrorCode ierr;
2848 
2849   PetscFunctionBegin;
2850   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2851   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2856 {
2857   Mat_MPIAIJ     *b;
2858   PetscErrorCode ierr;
2859   PetscMPIInt    size;
2860 
2861   PetscFunctionBegin;
2862   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2863   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2864   b = (Mat_MPIAIJ*)B->data;
2865 
2866 #if defined(PETSC_USE_CTABLE)
2867   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2868 #else
2869   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2870 #endif
2871   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2872   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2873   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2874 
2875   /* Because the B will have been resized we simply destroy it and create a new one each time */
2876   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2877   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2878   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2879   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2880   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2881   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2882   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2883 
2884   if (!B->preallocated) {
2885     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2886     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2887     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2888     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2890   }
2891 
2892   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2893   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2894   B->preallocated  = PETSC_TRUE;
2895   B->was_assembled = PETSC_FALSE;
2896   B->assembled     = PETSC_FALSE;
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2901 {
2902   Mat_MPIAIJ     *b;
2903   PetscErrorCode ierr;
2904 
2905   PetscFunctionBegin;
2906   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2907   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2908   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2909   b = (Mat_MPIAIJ*)B->data;
2910 
2911 #if defined(PETSC_USE_CTABLE)
2912   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2913 #else
2914   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2915 #endif
2916   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2917   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2918   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2919 
2920   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2921   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2922   B->preallocated  = PETSC_TRUE;
2923   B->was_assembled = PETSC_FALSE;
2924   B->assembled = PETSC_FALSE;
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2929 {
2930   Mat            mat;
2931   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   *newmat = NULL;
2936   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2937   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2938   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2939   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2940   a       = (Mat_MPIAIJ*)mat->data;
2941 
2942   mat->factortype   = matin->factortype;
2943   mat->assembled    = matin->assembled;
2944   mat->insertmode   = NOT_SET_VALUES;
2945   mat->preallocated = matin->preallocated;
2946 
2947   a->size         = oldmat->size;
2948   a->rank         = oldmat->rank;
2949   a->donotstash   = oldmat->donotstash;
2950   a->roworiented  = oldmat->roworiented;
2951   a->rowindices   = NULL;
2952   a->rowvalues    = NULL;
2953   a->getrowactive = PETSC_FALSE;
2954 
2955   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2956   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2957 
2958   if (oldmat->colmap) {
2959 #if defined(PETSC_USE_CTABLE)
2960     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2961 #else
2962     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2963     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2964     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2965 #endif
2966   } else a->colmap = NULL;
2967   if (oldmat->garray) {
2968     PetscInt len;
2969     len  = oldmat->B->cmap->n;
2970     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2971     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2972     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2973   } else a->garray = NULL;
2974 
2975   /* It may happen MatDuplicate is called with a non-assembled matrix
2976      In fact, MatDuplicate only requires the matrix to be preallocated
2977      This may happen inside a DMCreateMatrix_Shell */
2978   if (oldmat->lvec) {
2979     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2980     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2981   }
2982   if (oldmat->Mvctx) {
2983     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2984     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2985   }
2986   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2987   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2988   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2989   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2990   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2991   *newmat = mat;
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2996 {
2997   PetscBool      isbinary, ishdf5;
2998   PetscErrorCode ierr;
2999 
3000   PetscFunctionBegin;
3001   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3002   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3003   /* force binary viewer to load .info file if it has not yet done so */
3004   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3005   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3006   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3007   if (isbinary) {
3008     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3009   } else if (ishdf5) {
3010 #if defined(PETSC_HAVE_HDF5)
3011     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3012 #else
3013     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3014 #endif
3015   } else {
3016     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3017   }
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3022 {
3023   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3024   PetscInt       *rowidxs,*colidxs;
3025   PetscScalar    *matvals;
3026   PetscErrorCode ierr;
3027 
3028   PetscFunctionBegin;
3029   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3030 
3031   /* read in matrix header */
3032   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3033   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3034   M  = header[1]; N = header[2]; nz = header[3];
3035   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3036   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3037   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3038 
3039   /* set block sizes from the viewer's .info file */
3040   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3041   /* set global sizes if not set already */
3042   if (mat->rmap->N < 0) mat->rmap->N = M;
3043   if (mat->cmap->N < 0) mat->cmap->N = N;
3044   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3045   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3046 
3047   /* check if the matrix sizes are correct */
3048   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3049   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3050 
3051   /* read in row lengths and build row indices */
3052   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3053   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3054   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3055   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3056   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3057   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3058   /* read in column indices and matrix values */
3059   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3060   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3061   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3062   /* store matrix indices and values */
3063   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3064   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3065   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 /* Not scalable because of ISAllGather() unless getting all columns. */
3070 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3071 {
3072   PetscErrorCode ierr;
3073   IS             iscol_local;
3074   PetscBool      isstride;
3075   PetscMPIInt    lisstride=0,gisstride;
3076 
3077   PetscFunctionBegin;
3078   /* check if we are grabbing all columns*/
3079   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3080 
3081   if (isstride) {
3082     PetscInt  start,len,mstart,mlen;
3083     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3084     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3085     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3086     if (mstart == start && mlen-mstart == len) lisstride = 1;
3087   }
3088 
3089   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3090   if (gisstride) {
3091     PetscInt N;
3092     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3093     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3094     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3095     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3096   } else {
3097     PetscInt cbs;
3098     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3099     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3100     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3101   }
3102 
3103   *isseq = iscol_local;
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 /*
3108  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3109  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3110 
3111  Input Parameters:
3112    mat - matrix
3113    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3114            i.e., mat->rstart <= isrow[i] < mat->rend
3115    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3116            i.e., mat->cstart <= iscol[i] < mat->cend
3117  Output Parameter:
3118    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3119    iscol_o - sequential column index set for retrieving mat->B
3120    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3121  */
3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3123 {
3124   PetscErrorCode ierr;
3125   Vec            x,cmap;
3126   const PetscInt *is_idx;
3127   PetscScalar    *xarray,*cmaparray;
3128   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3129   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3130   Mat            B=a->B;
3131   Vec            lvec=a->lvec,lcmap;
3132   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3133   MPI_Comm       comm;
3134   VecScatter     Mvctx=a->Mvctx;
3135 
3136   PetscFunctionBegin;
3137   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3138   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3139 
3140   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3141   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3142   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3143   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3144   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3145 
3146   /* Get start indices */
3147   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3148   isstart -= ncols;
3149   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3150 
3151   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3152   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3153   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3154   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3155   for (i=0; i<ncols; i++) {
3156     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3157     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3158     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3159   }
3160   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3161   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3162   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3163 
3164   /* Get iscol_d */
3165   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3166   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3167   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3168 
3169   /* Get isrow_d */
3170   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3171   rstart = mat->rmap->rstart;
3172   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3173   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3174   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3175   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3176 
3177   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3178   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3179   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3180 
3181   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3182   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3183   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3184 
3185   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3186 
3187   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3188   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3189 
3190   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3191   /* off-process column indices */
3192   count = 0;
3193   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3194   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3195 
3196   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3197   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3198   for (i=0; i<Bn; i++) {
3199     if (PetscRealPart(xarray[i]) > -1.0) {
3200       idx[count]     = i;                   /* local column index in off-diagonal part B */
3201       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3202       count++;
3203     }
3204   }
3205   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3206   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3207 
3208   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3209   /* cannot ensure iscol_o has same blocksize as iscol! */
3210 
3211   ierr = PetscFree(idx);CHKERRQ(ierr);
3212   *garray = cmap1;
3213 
3214   ierr = VecDestroy(&x);CHKERRQ(ierr);
3215   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3216   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3221 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3222 {
3223   PetscErrorCode ierr;
3224   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3225   Mat            M = NULL;
3226   MPI_Comm       comm;
3227   IS             iscol_d,isrow_d,iscol_o;
3228   Mat            Asub = NULL,Bsub = NULL;
3229   PetscInt       n;
3230 
3231   PetscFunctionBegin;
3232   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3233 
3234   if (call == MAT_REUSE_MATRIX) {
3235     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3236     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3237     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3238 
3239     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3240     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3241 
3242     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3243     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3244 
3245     /* Update diagonal and off-diagonal portions of submat */
3246     asub = (Mat_MPIAIJ*)(*submat)->data;
3247     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3248     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3249     if (n) {
3250       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3251     }
3252     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3253     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254 
3255   } else { /* call == MAT_INITIAL_MATRIX) */
3256     const PetscInt *garray;
3257     PetscInt        BsubN;
3258 
3259     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3260     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3261 
3262     /* Create local submatrices Asub and Bsub */
3263     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3264     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3265 
3266     /* Create submatrix M */
3267     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3268 
3269     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3270     asub = (Mat_MPIAIJ*)M->data;
3271 
3272     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3273     n = asub->B->cmap->N;
3274     if (BsubN > n) {
3275       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3276       const PetscInt *idx;
3277       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3278       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3279 
3280       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3281       j = 0;
3282       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3283       for (i=0; i<n; i++) {
3284         if (j >= BsubN) break;
3285         while (subgarray[i] > garray[j]) j++;
3286 
3287         if (subgarray[i] == garray[j]) {
3288           idx_new[i] = idx[j++];
3289         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3290       }
3291       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3292 
3293       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3294       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3295 
3296     } else if (BsubN < n) {
3297       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3298     }
3299 
3300     ierr = PetscFree(garray);CHKERRQ(ierr);
3301     *submat = M;
3302 
3303     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3304     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3305     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3306 
3307     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3308     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3309 
3310     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3311     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3312   }
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3317 {
3318   PetscErrorCode ierr;
3319   IS             iscol_local=NULL,isrow_d;
3320   PetscInt       csize;
3321   PetscInt       n,i,j,start,end;
3322   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3323   MPI_Comm       comm;
3324 
3325   PetscFunctionBegin;
3326   /* If isrow has same processor distribution as mat,
3327      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3328   if (call == MAT_REUSE_MATRIX) {
3329     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3330     if (isrow_d) {
3331       sameRowDist  = PETSC_TRUE;
3332       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3333     } else {
3334       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3335       if (iscol_local) {
3336         sameRowDist  = PETSC_TRUE;
3337         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3338       }
3339     }
3340   } else {
3341     /* Check if isrow has same processor distribution as mat */
3342     sameDist[0] = PETSC_FALSE;
3343     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3344     if (!n) {
3345       sameDist[0] = PETSC_TRUE;
3346     } else {
3347       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3348       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3349       if (i >= start && j < end) {
3350         sameDist[0] = PETSC_TRUE;
3351       }
3352     }
3353 
3354     /* Check if iscol has same processor distribution as mat */
3355     sameDist[1] = PETSC_FALSE;
3356     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3357     if (!n) {
3358       sameDist[1] = PETSC_TRUE;
3359     } else {
3360       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3361       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3362       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3363     }
3364 
3365     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3366     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3367     sameRowDist = tsameDist[0];
3368   }
3369 
3370   if (sameRowDist) {
3371     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3372       /* isrow and iscol have same processor distribution as mat */
3373       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3374       PetscFunctionReturn(0);
3375     } else { /* sameRowDist */
3376       /* isrow has same processor distribution as mat */
3377       if (call == MAT_INITIAL_MATRIX) {
3378         PetscBool sorted;
3379         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3380         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3381         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3382         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3383 
3384         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3385         if (sorted) {
3386           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3387           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3388           PetscFunctionReturn(0);
3389         }
3390       } else { /* call == MAT_REUSE_MATRIX */
3391         IS iscol_sub;
3392         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3393         if (iscol_sub) {
3394           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3395           PetscFunctionReturn(0);
3396         }
3397       }
3398     }
3399   }
3400 
3401   /* General case: iscol -> iscol_local which has global size of iscol */
3402   if (call == MAT_REUSE_MATRIX) {
3403     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3404     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3405   } else {
3406     if (!iscol_local) {
3407       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3408     }
3409   }
3410 
3411   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3412   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3413 
3414   if (call == MAT_INITIAL_MATRIX) {
3415     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3416     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3417   }
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 /*@C
3422      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3423          and "off-diagonal" part of the matrix in CSR format.
3424 
3425    Collective
3426 
3427    Input Parameters:
3428 +  comm - MPI communicator
3429 .  A - "diagonal" portion of matrix
3430 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3431 -  garray - global index of B columns
3432 
3433    Output Parameter:
3434 .   mat - the matrix, with input A as its local diagonal matrix
3435    Level: advanced
3436 
3437    Notes:
3438        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3439        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3440 
3441 .seealso: MatCreateMPIAIJWithSplitArrays()
3442 @*/
3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3444 {
3445   PetscErrorCode    ierr;
3446   Mat_MPIAIJ        *maij;
3447   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3448   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3449   const PetscScalar *oa;
3450   Mat               Bnew;
3451   PetscInt          m,n,N;
3452 
3453   PetscFunctionBegin;
3454   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3455   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3456   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3457   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3458   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3459   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3460 
3461   /* Get global columns of mat */
3462   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3463 
3464   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3465   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3466   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3467   maij = (Mat_MPIAIJ*)(*mat)->data;
3468 
3469   (*mat)->preallocated = PETSC_TRUE;
3470 
3471   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3472   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3473 
3474   /* Set A as diagonal portion of *mat */
3475   maij->A = A;
3476 
3477   nz = oi[m];
3478   for (i=0; i<nz; i++) {
3479     col   = oj[i];
3480     oj[i] = garray[col];
3481   }
3482 
3483   /* Set Bnew as off-diagonal portion of *mat */
3484   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3485   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3486   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3487   bnew        = (Mat_SeqAIJ*)Bnew->data;
3488   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3489   maij->B     = Bnew;
3490 
3491   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3492 
3493   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3494   b->free_a       = PETSC_FALSE;
3495   b->free_ij      = PETSC_FALSE;
3496   ierr = MatDestroy(&B);CHKERRQ(ierr);
3497 
3498   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3499   bnew->free_a       = PETSC_TRUE;
3500   bnew->free_ij      = PETSC_TRUE;
3501 
3502   /* condense columns of maij->B */
3503   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3504   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3505   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3506   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3507   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3512 
3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3514 {
3515   PetscErrorCode ierr;
3516   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3519   Mat            M,Msub,B=a->B;
3520   MatScalar      *aa;
3521   Mat_SeqAIJ     *aij;
3522   PetscInt       *garray = a->garray,*colsub,Ncols;
3523   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3524   IS             iscol_sub,iscmap;
3525   const PetscInt *is_idx,*cmap;
3526   PetscBool      allcolumns=PETSC_FALSE;
3527   MPI_Comm       comm;
3528 
3529   PetscFunctionBegin;
3530   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3531   if (call == MAT_REUSE_MATRIX) {
3532     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3533     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3534     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3535 
3536     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3537     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3538 
3539     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3540     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3541 
3542     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3543 
3544   } else { /* call == MAT_INITIAL_MATRIX) */
3545     PetscBool flg;
3546 
3547     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3548     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3549 
3550     /* (1) iscol -> nonscalable iscol_local */
3551     /* Check for special case: each processor gets entire matrix columns */
3552     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3553     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3554     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3555     if (allcolumns) {
3556       iscol_sub = iscol_local;
3557       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3558       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3559 
3560     } else {
3561       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3562       PetscInt *idx,*cmap1,k;
3563       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3564       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3565       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3566       count = 0;
3567       k     = 0;
3568       for (i=0; i<Ncols; i++) {
3569         j = is_idx[i];
3570         if (j >= cstart && j < cend) {
3571           /* diagonal part of mat */
3572           idx[count]     = j;
3573           cmap1[count++] = i; /* column index in submat */
3574         } else if (Bn) {
3575           /* off-diagonal part of mat */
3576           if (j == garray[k]) {
3577             idx[count]     = j;
3578             cmap1[count++] = i;  /* column index in submat */
3579           } else if (j > garray[k]) {
3580             while (j > garray[k] && k < Bn-1) k++;
3581             if (j == garray[k]) {
3582               idx[count]     = j;
3583               cmap1[count++] = i; /* column index in submat */
3584             }
3585           }
3586         }
3587       }
3588       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3589 
3590       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3591       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3592       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3593 
3594       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3595     }
3596 
3597     /* (3) Create sequential Msub */
3598     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3599   }
3600 
3601   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3602   aij  = (Mat_SeqAIJ*)(Msub)->data;
3603   ii   = aij->i;
3604   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3605 
3606   /*
3607       m - number of local rows
3608       Ncols - number of columns (same on all processors)
3609       rstart - first row in new global matrix generated
3610   */
3611   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3612 
3613   if (call == MAT_INITIAL_MATRIX) {
3614     /* (4) Create parallel newmat */
3615     PetscMPIInt    rank,size;
3616     PetscInt       csize;
3617 
3618     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3619     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3620 
3621     /*
3622         Determine the number of non-zeros in the diagonal and off-diagonal
3623         portions of the matrix in order to do correct preallocation
3624     */
3625 
3626     /* first get start and end of "diagonal" columns */
3627     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3628     if (csize == PETSC_DECIDE) {
3629       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3630       if (mglobal == Ncols) { /* square matrix */
3631         nlocal = m;
3632       } else {
3633         nlocal = Ncols/size + ((Ncols % size) > rank);
3634       }
3635     } else {
3636       nlocal = csize;
3637     }
3638     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3639     rstart = rend - nlocal;
3640     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3641 
3642     /* next, compute all the lengths */
3643     jj    = aij->j;
3644     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3645     olens = dlens + m;
3646     for (i=0; i<m; i++) {
3647       jend = ii[i+1] - ii[i];
3648       olen = 0;
3649       dlen = 0;
3650       for (j=0; j<jend; j++) {
3651         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3652         else dlen++;
3653         jj++;
3654       }
3655       olens[i] = olen;
3656       dlens[i] = dlen;
3657     }
3658 
3659     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3660     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3661 
3662     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3663     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3664     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3665     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3666     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3667     ierr = PetscFree(dlens);CHKERRQ(ierr);
3668 
3669   } else { /* call == MAT_REUSE_MATRIX */
3670     M    = *newmat;
3671     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3672     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3673     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3674     /*
3675          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3676        rather than the slower MatSetValues().
3677     */
3678     M->was_assembled = PETSC_TRUE;
3679     M->assembled     = PETSC_FALSE;
3680   }
3681 
3682   /* (5) Set values of Msub to *newmat */
3683   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3684   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3685 
3686   jj   = aij->j;
3687   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3688   for (i=0; i<m; i++) {
3689     row = rstart + i;
3690     nz  = ii[i+1] - ii[i];
3691     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3692     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3693     jj += nz; aa += nz;
3694   }
3695   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3696   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3697 
3698   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3700 
3701   ierr = PetscFree(colsub);CHKERRQ(ierr);
3702 
3703   /* save Msub, iscol_sub and iscmap used in processor for next request */
3704   if (call == MAT_INITIAL_MATRIX) {
3705     *newmat = M;
3706     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3707     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3708 
3709     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3710     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3711 
3712     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3713     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3714 
3715     if (iscol_local) {
3716       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3717       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3718     }
3719   }
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 /*
3724     Not great since it makes two copies of the submatrix, first an SeqAIJ
3725   in local and then by concatenating the local matrices the end result.
3726   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3727 
3728   Note: This requires a sequential iscol with all indices.
3729 */
3730 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3731 {
3732   PetscErrorCode ierr;
3733   PetscMPIInt    rank,size;
3734   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3735   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3736   Mat            M,Mreuse;
3737   MatScalar      *aa,*vwork;
3738   MPI_Comm       comm;
3739   Mat_SeqAIJ     *aij;
3740   PetscBool      colflag,allcolumns=PETSC_FALSE;
3741 
3742   PetscFunctionBegin;
3743   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3744   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3745   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3746 
3747   /* Check for special case: each processor gets entire matrix columns */
3748   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3749   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3750   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3751   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3752 
3753   if (call ==  MAT_REUSE_MATRIX) {
3754     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3755     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3756     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3759   }
3760 
3761   /*
3762       m - number of local rows
3763       n - number of columns (same on all processors)
3764       rstart - first row in new global matrix generated
3765   */
3766   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3767   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3768   if (call == MAT_INITIAL_MATRIX) {
3769     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3770     ii  = aij->i;
3771     jj  = aij->j;
3772 
3773     /*
3774         Determine the number of non-zeros in the diagonal and off-diagonal
3775         portions of the matrix in order to do correct preallocation
3776     */
3777 
3778     /* first get start and end of "diagonal" columns */
3779     if (csize == PETSC_DECIDE) {
3780       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3781       if (mglobal == n) { /* square matrix */
3782         nlocal = m;
3783       } else {
3784         nlocal = n/size + ((n % size) > rank);
3785       }
3786     } else {
3787       nlocal = csize;
3788     }
3789     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3790     rstart = rend - nlocal;
3791     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3792 
3793     /* next, compute all the lengths */
3794     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3795     olens = dlens + m;
3796     for (i=0; i<m; i++) {
3797       jend = ii[i+1] - ii[i];
3798       olen = 0;
3799       dlen = 0;
3800       for (j=0; j<jend; j++) {
3801         if (*jj < rstart || *jj >= rend) olen++;
3802         else dlen++;
3803         jj++;
3804       }
3805       olens[i] = olen;
3806       dlens[i] = dlen;
3807     }
3808     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3809     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3810     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3811     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3812     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3813     ierr = PetscFree(dlens);CHKERRQ(ierr);
3814   } else {
3815     PetscInt ml,nl;
3816 
3817     M    = *newmat;
3818     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3819     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3820     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3821     /*
3822          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3823        rather than the slower MatSetValues().
3824     */
3825     M->was_assembled = PETSC_TRUE;
3826     M->assembled     = PETSC_FALSE;
3827   }
3828   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3829   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3830   ii   = aij->i;
3831   jj   = aij->j;
3832 
3833   /* trigger copy to CPU if needed */
3834   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3835   for (i=0; i<m; i++) {
3836     row   = rstart + i;
3837     nz    = ii[i+1] - ii[i];
3838     cwork = jj; jj += nz;
3839     vwork = aa; aa += nz;
3840     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3841   }
3842   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3843 
3844   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846   *newmat = M;
3847 
3848   /* save submatrix used in processor for next request */
3849   if (call ==  MAT_INITIAL_MATRIX) {
3850     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3851     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3852   }
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3857 {
3858   PetscInt       m,cstart, cend,j,nnz,i,d;
3859   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3860   const PetscInt *JJ;
3861   PetscErrorCode ierr;
3862   PetscBool      nooffprocentries;
3863 
3864   PetscFunctionBegin;
3865   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3866 
3867   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3868   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3869   m      = B->rmap->n;
3870   cstart = B->cmap->rstart;
3871   cend   = B->cmap->rend;
3872   rstart = B->rmap->rstart;
3873 
3874   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3875 
3876   if (PetscDefined(USE_DEBUG)) {
3877     for (i=0; i<m; i++) {
3878       nnz = Ii[i+1]- Ii[i];
3879       JJ  = J + Ii[i];
3880       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3881       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3882       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3883     }
3884   }
3885 
3886   for (i=0; i<m; i++) {
3887     nnz     = Ii[i+1]- Ii[i];
3888     JJ      = J + Ii[i];
3889     nnz_max = PetscMax(nnz_max,nnz);
3890     d       = 0;
3891     for (j=0; j<nnz; j++) {
3892       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3893     }
3894     d_nnz[i] = d;
3895     o_nnz[i] = nnz - d;
3896   }
3897   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3898   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3899 
3900   for (i=0; i<m; i++) {
3901     ii   = i + rstart;
3902     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3903   }
3904   nooffprocentries    = B->nooffprocentries;
3905   B->nooffprocentries = PETSC_TRUE;
3906   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3907   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3908   B->nooffprocentries = nooffprocentries;
3909 
3910   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 /*@
3915    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3916    (the default parallel PETSc format).
3917 
3918    Collective
3919 
3920    Input Parameters:
3921 +  B - the matrix
3922 .  i - the indices into j for the start of each local row (starts with zero)
3923 .  j - the column indices for each local row (starts with zero)
3924 -  v - optional values in the matrix
3925 
3926    Level: developer
3927 
3928    Notes:
3929        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3930      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3931      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3932 
3933        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3934 
3935        The format which is used for the sparse matrix input, is equivalent to a
3936     row-major ordering.. i.e for the following matrix, the input data expected is
3937     as shown
3938 
3939 $        1 0 0
3940 $        2 0 3     P0
3941 $       -------
3942 $        4 5 6     P1
3943 $
3944 $     Process0 [P0]: rows_owned=[0,1]
3945 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3946 $        j =  {0,0,2}  [size = 3]
3947 $        v =  {1,2,3}  [size = 3]
3948 $
3949 $     Process1 [P1]: rows_owned=[2]
3950 $        i =  {0,3}    [size = nrow+1  = 1+1]
3951 $        j =  {0,1,2}  [size = 3]
3952 $        v =  {4,5,6}  [size = 3]
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3955           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3956 @*/
3957 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3958 {
3959   PetscErrorCode ierr;
3960 
3961   PetscFunctionBegin;
3962   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 /*@C
3967    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3968    (the default parallel PETSc format).  For good matrix assembly performance
3969    the user should preallocate the matrix storage by setting the parameters
3970    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3971    performance can be increased by more than a factor of 50.
3972 
3973    Collective
3974 
3975    Input Parameters:
3976 +  B - the matrix
3977 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3978            (same value is used for all local rows)
3979 .  d_nnz - array containing the number of nonzeros in the various rows of the
3980            DIAGONAL portion of the local submatrix (possibly different for each row)
3981            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3982            The size of this array is equal to the number of local rows, i.e 'm'.
3983            For matrices that will be factored, you must leave room for (and set)
3984            the diagonal entry even if it is zero.
3985 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3986            submatrix (same value is used for all local rows).
3987 -  o_nnz - array containing the number of nonzeros in the various rows of the
3988            OFF-DIAGONAL portion of the local submatrix (possibly different for
3989            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3990            structure. The size of this array is equal to the number
3991            of local rows, i.e 'm'.
3992 
3993    If the *_nnz parameter is given then the *_nz parameter is ignored
3994 
3995    The AIJ format (also called the Yale sparse matrix format or
3996    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3997    storage.  The stored row and column indices begin with zero.
3998    See Users-Manual: ch_mat for details.
3999 
4000    The parallel matrix is partitioned such that the first m0 rows belong to
4001    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4002    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4003 
4004    The DIAGONAL portion of the local submatrix of a processor can be defined
4005    as the submatrix which is obtained by extraction the part corresponding to
4006    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4007    first row that belongs to the processor, r2 is the last row belonging to
4008    the this processor, and c1-c2 is range of indices of the local part of a
4009    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4010    common case of a square matrix, the row and column ranges are the same and
4011    the DIAGONAL part is also square. The remaining portion of the local
4012    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4013 
4014    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4015 
4016    You can call MatGetInfo() to get information on how effective the preallocation was;
4017    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4018    You can also run with the option -info and look for messages with the string
4019    malloc in them to see if additional memory allocation was needed.
4020 
4021    Example usage:
4022 
4023    Consider the following 8x8 matrix with 34 non-zero values, that is
4024    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4025    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4026    as follows:
4027 
4028 .vb
4029             1  2  0  |  0  3  0  |  0  4
4030     Proc0   0  5  6  |  7  0  0  |  8  0
4031             9  0 10  | 11  0  0  | 12  0
4032     -------------------------------------
4033            13  0 14  | 15 16 17  |  0  0
4034     Proc1   0 18  0  | 19 20 21  |  0  0
4035             0  0  0  | 22 23  0  | 24  0
4036     -------------------------------------
4037     Proc2  25 26 27  |  0  0 28  | 29  0
4038            30  0  0  | 31 32 33  |  0 34
4039 .ve
4040 
4041    This can be represented as a collection of submatrices as:
4042 
4043 .vb
4044       A B C
4045       D E F
4046       G H I
4047 .ve
4048 
4049    Where the submatrices A,B,C are owned by proc0, D,E,F are
4050    owned by proc1, G,H,I are owned by proc2.
4051 
4052    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4053    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4054    The 'M','N' parameters are 8,8, and have the same values on all procs.
4055 
4056    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4057    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4058    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4059    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4060    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4061    matrix, ans [DF] as another SeqAIJ matrix.
4062 
4063    When d_nz, o_nz parameters are specified, d_nz storage elements are
4064    allocated for every row of the local diagonal submatrix, and o_nz
4065    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4066    One way to choose d_nz and o_nz is to use the max nonzerors per local
4067    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4068    In this case, the values of d_nz,o_nz are:
4069 .vb
4070      proc0 : dnz = 2, o_nz = 2
4071      proc1 : dnz = 3, o_nz = 2
4072      proc2 : dnz = 1, o_nz = 4
4073 .ve
4074    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4075    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4076    for proc3. i.e we are using 12+15+10=37 storage locations to store
4077    34 values.
4078 
4079    When d_nnz, o_nnz parameters are specified, the storage is specified
4080    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4081    In the above case the values for d_nnz,o_nnz are:
4082 .vb
4083      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4084      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4085      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4086 .ve
4087    Here the space allocated is sum of all the above values i.e 34, and
4088    hence pre-allocation is perfect.
4089 
4090    Level: intermediate
4091 
4092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4093           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4094 @*/
4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4096 {
4097   PetscErrorCode ierr;
4098 
4099   PetscFunctionBegin;
4100   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4101   PetscValidType(B,1);
4102   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4103   PetscFunctionReturn(0);
4104 }
4105 
4106 /*@
4107      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4108          CSR format for the local rows.
4109 
4110    Collective
4111 
4112    Input Parameters:
4113 +  comm - MPI communicator
4114 .  m - number of local rows (Cannot be PETSC_DECIDE)
4115 .  n - This value should be the same as the local size used in creating the
4116        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4117        calculated if N is given) For square matrices n is almost always m.
4118 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4119 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4120 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4121 .   j - column indices
4122 -   a - matrix values
4123 
4124    Output Parameter:
4125 .   mat - the matrix
4126 
4127    Level: intermediate
4128 
4129    Notes:
4130        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4131      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4132      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4133 
4134        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4135 
4136        The format which is used for the sparse matrix input, is equivalent to a
4137     row-major ordering.. i.e for the following matrix, the input data expected is
4138     as shown
4139 
4140        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4141 
4142 $        1 0 0
4143 $        2 0 3     P0
4144 $       -------
4145 $        4 5 6     P1
4146 $
4147 $     Process0 [P0]: rows_owned=[0,1]
4148 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4149 $        j =  {0,0,2}  [size = 3]
4150 $        v =  {1,2,3}  [size = 3]
4151 $
4152 $     Process1 [P1]: rows_owned=[2]
4153 $        i =  {0,3}    [size = nrow+1  = 1+1]
4154 $        j =  {0,1,2}  [size = 3]
4155 $        v =  {4,5,6}  [size = 3]
4156 
4157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4158           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4159 @*/
4160 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4161 {
4162   PetscErrorCode ierr;
4163 
4164   PetscFunctionBegin;
4165   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4166   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4167   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4168   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4169   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4170   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4171   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 /*@
4176      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4177          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4178 
4179    Collective
4180 
4181    Input Parameters:
4182 +  mat - the matrix
4183 .  m - number of local rows (Cannot be PETSC_DECIDE)
4184 .  n - This value should be the same as the local size used in creating the
4185        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4186        calculated if N is given) For square matrices n is almost always m.
4187 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4188 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4189 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4190 .  J - column indices
4191 -  v - matrix values
4192 
4193    Level: intermediate
4194 
4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4196           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4197 @*/
4198 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4199 {
4200   PetscErrorCode ierr;
4201   PetscInt       cstart,nnz,i,j;
4202   PetscInt       *ld;
4203   PetscBool      nooffprocentries;
4204   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4205   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4206   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4207   const PetscInt *Adi = Ad->i;
4208   PetscInt       ldi,Iii,md;
4209 
4210   PetscFunctionBegin;
4211   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4212   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4213   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4214   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4215 
4216   cstart = mat->cmap->rstart;
4217   if (!Aij->ld) {
4218     /* count number of entries below block diagonal */
4219     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4220     Aij->ld = ld;
4221     for (i=0; i<m; i++) {
4222       nnz  = Ii[i+1]- Ii[i];
4223       j     = 0;
4224       while  (J[j] < cstart && j < nnz) {j++;}
4225       J    += nnz;
4226       ld[i] = j;
4227     }
4228   } else {
4229     ld = Aij->ld;
4230   }
4231 
4232   for (i=0; i<m; i++) {
4233     nnz  = Ii[i+1]- Ii[i];
4234     Iii  = Ii[i];
4235     ldi  = ld[i];
4236     md   = Adi[i+1]-Adi[i];
4237     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4238     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4239     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4240     ad  += md;
4241     ao  += nnz - md;
4242   }
4243   nooffprocentries      = mat->nooffprocentries;
4244   mat->nooffprocentries = PETSC_TRUE;
4245   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4246   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4247   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4248   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4249   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4250   mat->nooffprocentries = nooffprocentries;
4251   PetscFunctionReturn(0);
4252 }
4253 
4254 /*@C
4255    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4256    (the default parallel PETSc format).  For good matrix assembly performance
4257    the user should preallocate the matrix storage by setting the parameters
4258    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4259    performance can be increased by more than a factor of 50.
4260 
4261    Collective
4262 
4263    Input Parameters:
4264 +  comm - MPI communicator
4265 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4266            This value should be the same as the local size used in creating the
4267            y vector for the matrix-vector product y = Ax.
4268 .  n - This value should be the same as the local size used in creating the
4269        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4270        calculated if N is given) For square matrices n is almost always m.
4271 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4272 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4273 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4274            (same value is used for all local rows)
4275 .  d_nnz - array containing the number of nonzeros in the various rows of the
4276            DIAGONAL portion of the local submatrix (possibly different for each row)
4277            or NULL, if d_nz is used to specify the nonzero structure.
4278            The size of this array is equal to the number of local rows, i.e 'm'.
4279 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4280            submatrix (same value is used for all local rows).
4281 -  o_nnz - array containing the number of nonzeros in the various rows of the
4282            OFF-DIAGONAL portion of the local submatrix (possibly different for
4283            each row) or NULL, if o_nz is used to specify the nonzero
4284            structure. The size of this array is equal to the number
4285            of local rows, i.e 'm'.
4286 
4287    Output Parameter:
4288 .  A - the matrix
4289 
4290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4291    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4293 
4294    Notes:
4295    If the *_nnz parameter is given then the *_nz parameter is ignored
4296 
4297    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4298    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4299    storage requirements for this matrix.
4300 
4301    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4302    processor than it must be used on all processors that share the object for
4303    that argument.
4304 
4305    The user MUST specify either the local or global matrix dimensions
4306    (possibly both).
4307 
4308    The parallel matrix is partitioned across processors such that the
4309    first m0 rows belong to process 0, the next m1 rows belong to
4310    process 1, the next m2 rows belong to process 2 etc.. where
4311    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4312    values corresponding to [m x N] submatrix.
4313 
4314    The columns are logically partitioned with the n0 columns belonging
4315    to 0th partition, the next n1 columns belonging to the next
4316    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4317 
4318    The DIAGONAL portion of the local submatrix on any given processor
4319    is the submatrix corresponding to the rows and columns m,n
4320    corresponding to the given processor. i.e diagonal matrix on
4321    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4322    etc. The remaining portion of the local submatrix [m x (N-n)]
4323    constitute the OFF-DIAGONAL portion. The example below better
4324    illustrates this concept.
4325 
4326    For a square global matrix we define each processor's diagonal portion
4327    to be its local rows and the corresponding columns (a square submatrix);
4328    each processor's off-diagonal portion encompasses the remainder of the
4329    local matrix (a rectangular submatrix).
4330 
4331    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4332 
4333    When calling this routine with a single process communicator, a matrix of
4334    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4335    type of communicator, use the construction mechanism
4336 .vb
4337      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4338 .ve
4339 
4340 $     MatCreate(...,&A);
4341 $     MatSetType(A,MATMPIAIJ);
4342 $     MatSetSizes(A, m,n,M,N);
4343 $     MatMPIAIJSetPreallocation(A,...);
4344 
4345    By default, this format uses inodes (identical nodes) when possible.
4346    We search for consecutive rows with the same nonzero structure, thereby
4347    reusing matrix information to achieve increased efficiency.
4348 
4349    Options Database Keys:
4350 +  -mat_no_inode  - Do not use inodes
4351 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4352 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4353         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4354         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4355 
4356    Example usage:
4357 
4358    Consider the following 8x8 matrix with 34 non-zero values, that is
4359    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4360    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4361    as follows
4362 
4363 .vb
4364             1  2  0  |  0  3  0  |  0  4
4365     Proc0   0  5  6  |  7  0  0  |  8  0
4366             9  0 10  | 11  0  0  | 12  0
4367     -------------------------------------
4368            13  0 14  | 15 16 17  |  0  0
4369     Proc1   0 18  0  | 19 20 21  |  0  0
4370             0  0  0  | 22 23  0  | 24  0
4371     -------------------------------------
4372     Proc2  25 26 27  |  0  0 28  | 29  0
4373            30  0  0  | 31 32 33  |  0 34
4374 .ve
4375 
4376    This can be represented as a collection of submatrices as
4377 
4378 .vb
4379       A B C
4380       D E F
4381       G H I
4382 .ve
4383 
4384    Where the submatrices A,B,C are owned by proc0, D,E,F are
4385    owned by proc1, G,H,I are owned by proc2.
4386 
4387    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4388    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4389    The 'M','N' parameters are 8,8, and have the same values on all procs.
4390 
4391    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4392    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4393    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4394    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4395    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4396    matrix, ans [DF] as another SeqAIJ matrix.
4397 
4398    When d_nz, o_nz parameters are specified, d_nz storage elements are
4399    allocated for every row of the local diagonal submatrix, and o_nz
4400    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4401    One way to choose d_nz and o_nz is to use the max nonzerors per local
4402    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4403    In this case, the values of d_nz,o_nz are
4404 .vb
4405      proc0 : dnz = 2, o_nz = 2
4406      proc1 : dnz = 3, o_nz = 2
4407      proc2 : dnz = 1, o_nz = 4
4408 .ve
4409    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4410    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4411    for proc3. i.e we are using 12+15+10=37 storage locations to store
4412    34 values.
4413 
4414    When d_nnz, o_nnz parameters are specified, the storage is specified
4415    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4416    In the above case the values for d_nnz,o_nnz are
4417 .vb
4418      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4419      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4420      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4421 .ve
4422    Here the space allocated is sum of all the above values i.e 34, and
4423    hence pre-allocation is perfect.
4424 
4425    Level: intermediate
4426 
4427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4428           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4429 @*/
4430 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4431 {
4432   PetscErrorCode ierr;
4433   PetscMPIInt    size;
4434 
4435   PetscFunctionBegin;
4436   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4437   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4438   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4439   if (size > 1) {
4440     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4441     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4442   } else {
4443     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4444     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4445   }
4446   PetscFunctionReturn(0);
4447 }
4448 
4449 /*@C
4450   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4451 
4452   Not collective
4453 
4454   Input Parameter:
4455 . A - The MPIAIJ matrix
4456 
4457   Output Parameters:
4458 + Ad - The local diagonal block as a SeqAIJ matrix
4459 . Ao - The local off-diagonal block as a SeqAIJ matrix
4460 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4461 
4462   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4463   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4464   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4465   local column numbers to global column numbers in the original matrix.
4466 
4467   Level: intermediate
4468 
4469 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4470 @*/
4471 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4472 {
4473   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4474   PetscBool      flg;
4475   PetscErrorCode ierr;
4476 
4477   PetscFunctionBegin;
4478   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4479   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4480   if (Ad)     *Ad     = a->A;
4481   if (Ao)     *Ao     = a->B;
4482   if (colmap) *colmap = a->garray;
4483   PetscFunctionReturn(0);
4484 }
4485 
4486 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4487 {
4488   PetscErrorCode ierr;
4489   PetscInt       m,N,i,rstart,nnz,Ii;
4490   PetscInt       *indx;
4491   PetscScalar    *values;
4492   MatType        rootType;
4493 
4494   PetscFunctionBegin;
4495   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4496   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4497     PetscInt       *dnz,*onz,sum,bs,cbs;
4498 
4499     if (n == PETSC_DECIDE) {
4500       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4501     }
4502     /* Check sum(n) = N */
4503     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4504     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4505 
4506     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4507     rstart -= m;
4508 
4509     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4510     for (i=0; i<m; i++) {
4511       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4512       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4513       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4514     }
4515 
4516     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4517     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4518     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4519     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4520     ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr);
4521     ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr);
4522     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4523     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4524     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4525     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4526   }
4527 
4528   /* numeric phase */
4529   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4530   for (i=0; i<m; i++) {
4531     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4532     Ii   = i + rstart;
4533     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4534     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4535   }
4536   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4537   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4538   PetscFunctionReturn(0);
4539 }
4540 
4541 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4542 {
4543   PetscErrorCode    ierr;
4544   PetscMPIInt       rank;
4545   PetscInt          m,N,i,rstart,nnz;
4546   size_t            len;
4547   const PetscInt    *indx;
4548   PetscViewer       out;
4549   char              *name;
4550   Mat               B;
4551   const PetscScalar *values;
4552 
4553   PetscFunctionBegin;
4554   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4555   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4556   /* Should this be the type of the diagonal block of A? */
4557   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4558   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4559   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4560   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4561   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4562   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4563   for (i=0; i<m; i++) {
4564     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4565     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4566     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4567   }
4568   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4569   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4570 
4571   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4572   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4573   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4574   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4575   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4576   ierr = PetscFree(name);CHKERRQ(ierr);
4577   ierr = MatView(B,out);CHKERRQ(ierr);
4578   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4579   ierr = MatDestroy(&B);CHKERRQ(ierr);
4580   PetscFunctionReturn(0);
4581 }
4582 
4583 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4584 {
4585   PetscErrorCode      ierr;
4586   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4587 
4588   PetscFunctionBegin;
4589   if (!merge) PetscFunctionReturn(0);
4590   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4591   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4592   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4593   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4594   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4595   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4596   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4597   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4598   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4599   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4600   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4601   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4602   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4603   ierr = PetscFree(merge);CHKERRQ(ierr);
4604   PetscFunctionReturn(0);
4605 }
4606 
4607 #include <../src/mat/utils/freespace.h>
4608 #include <petscbt.h>
4609 
4610 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4611 {
4612   PetscErrorCode      ierr;
4613   MPI_Comm            comm;
4614   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4615   PetscMPIInt         size,rank,taga,*len_s;
4616   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4617   PetscInt            proc,m;
4618   PetscInt            **buf_ri,**buf_rj;
4619   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4620   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4621   MPI_Request         *s_waits,*r_waits;
4622   MPI_Status          *status;
4623   MatScalar           *aa=a->a;
4624   MatScalar           **abuf_r,*ba_i;
4625   Mat_Merge_SeqsToMPI *merge;
4626   PetscContainer      container;
4627 
4628   PetscFunctionBegin;
4629   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4630   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4631 
4632   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4633   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4634 
4635   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4636   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4637   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4638 
4639   bi     = merge->bi;
4640   bj     = merge->bj;
4641   buf_ri = merge->buf_ri;
4642   buf_rj = merge->buf_rj;
4643 
4644   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4645   owners = merge->rowmap->range;
4646   len_s  = merge->len_s;
4647 
4648   /* send and recv matrix values */
4649   /*-----------------------------*/
4650   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4651   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4652 
4653   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4654   for (proc=0,k=0; proc<size; proc++) {
4655     if (!len_s[proc]) continue;
4656     i    = owners[proc];
4657     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4658     k++;
4659   }
4660 
4661   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4662   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4663   ierr = PetscFree(status);CHKERRQ(ierr);
4664 
4665   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4666   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4667 
4668   /* insert mat values of mpimat */
4669   /*----------------------------*/
4670   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4671   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4672 
4673   for (k=0; k<merge->nrecv; k++) {
4674     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4675     nrows       = *(buf_ri_k[k]);
4676     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4677     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4678   }
4679 
4680   /* set values of ba */
4681   m = merge->rowmap->n;
4682   for (i=0; i<m; i++) {
4683     arow = owners[rank] + i;
4684     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4685     bnzi = bi[i+1] - bi[i];
4686     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4687 
4688     /* add local non-zero vals of this proc's seqmat into ba */
4689     anzi   = ai[arow+1] - ai[arow];
4690     aj     = a->j + ai[arow];
4691     aa     = a->a + ai[arow];
4692     nextaj = 0;
4693     for (j=0; nextaj<anzi; j++) {
4694       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4695         ba_i[j] += aa[nextaj++];
4696       }
4697     }
4698 
4699     /* add received vals into ba */
4700     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4701       /* i-th row */
4702       if (i == *nextrow[k]) {
4703         anzi   = *(nextai[k]+1) - *nextai[k];
4704         aj     = buf_rj[k] + *(nextai[k]);
4705         aa     = abuf_r[k] + *(nextai[k]);
4706         nextaj = 0;
4707         for (j=0; nextaj<anzi; j++) {
4708           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4709             ba_i[j] += aa[nextaj++];
4710           }
4711         }
4712         nextrow[k]++; nextai[k]++;
4713       }
4714     }
4715     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4716   }
4717   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4718   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4719 
4720   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4721   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4722   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4723   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4724   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4725   PetscFunctionReturn(0);
4726 }
4727 
4728 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4729 {
4730   PetscErrorCode      ierr;
4731   Mat                 B_mpi;
4732   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4733   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4734   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4735   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4736   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4737   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4738   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4739   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4740   MPI_Status          *status;
4741   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4742   PetscBT             lnkbt;
4743   Mat_Merge_SeqsToMPI *merge;
4744   PetscContainer      container;
4745 
4746   PetscFunctionBegin;
4747   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4748 
4749   /* make sure it is a PETSc comm */
4750   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4751   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4752   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4753 
4754   ierr = PetscNew(&merge);CHKERRQ(ierr);
4755   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4756 
4757   /* determine row ownership */
4758   /*---------------------------------------------------------*/
4759   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4760   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4761   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4762   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4763   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4764   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4765   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4766 
4767   m      = merge->rowmap->n;
4768   owners = merge->rowmap->range;
4769 
4770   /* determine the number of messages to send, their lengths */
4771   /*---------------------------------------------------------*/
4772   len_s = merge->len_s;
4773 
4774   len          = 0; /* length of buf_si[] */
4775   merge->nsend = 0;
4776   for (proc=0; proc<size; proc++) {
4777     len_si[proc] = 0;
4778     if (proc == rank) {
4779       len_s[proc] = 0;
4780     } else {
4781       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4782       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4783     }
4784     if (len_s[proc]) {
4785       merge->nsend++;
4786       nrows = 0;
4787       for (i=owners[proc]; i<owners[proc+1]; i++) {
4788         if (ai[i+1] > ai[i]) nrows++;
4789       }
4790       len_si[proc] = 2*(nrows+1);
4791       len         += len_si[proc];
4792     }
4793   }
4794 
4795   /* determine the number and length of messages to receive for ij-structure */
4796   /*-------------------------------------------------------------------------*/
4797   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4798   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4799 
4800   /* post the Irecv of j-structure */
4801   /*-------------------------------*/
4802   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4803   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4804 
4805   /* post the Isend of j-structure */
4806   /*--------------------------------*/
4807   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4808 
4809   for (proc=0, k=0; proc<size; proc++) {
4810     if (!len_s[proc]) continue;
4811     i    = owners[proc];
4812     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4813     k++;
4814   }
4815 
4816   /* receives and sends of j-structure are complete */
4817   /*------------------------------------------------*/
4818   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4819   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4820 
4821   /* send and recv i-structure */
4822   /*---------------------------*/
4823   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4824   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4825 
4826   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4827   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4828   for (proc=0,k=0; proc<size; proc++) {
4829     if (!len_s[proc]) continue;
4830     /* form outgoing message for i-structure:
4831          buf_si[0]:                 nrows to be sent
4832                [1:nrows]:           row index (global)
4833                [nrows+1:2*nrows+1]: i-structure index
4834     */
4835     /*-------------------------------------------*/
4836     nrows       = len_si[proc]/2 - 1;
4837     buf_si_i    = buf_si + nrows+1;
4838     buf_si[0]   = nrows;
4839     buf_si_i[0] = 0;
4840     nrows       = 0;
4841     for (i=owners[proc]; i<owners[proc+1]; i++) {
4842       anzi = ai[i+1] - ai[i];
4843       if (anzi) {
4844         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4845         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4846         nrows++;
4847       }
4848     }
4849     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4850     k++;
4851     buf_si += len_si[proc];
4852   }
4853 
4854   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4855   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4856 
4857   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4858   for (i=0; i<merge->nrecv; i++) {
4859     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4860   }
4861 
4862   ierr = PetscFree(len_si);CHKERRQ(ierr);
4863   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4864   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4865   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4866   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4867   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4868   ierr = PetscFree(status);CHKERRQ(ierr);
4869 
4870   /* compute a local seq matrix in each processor */
4871   /*----------------------------------------------*/
4872   /* allocate bi array and free space for accumulating nonzero column info */
4873   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4874   bi[0] = 0;
4875 
4876   /* create and initialize a linked list */
4877   nlnk = N+1;
4878   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4879 
4880   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4881   len  = ai[owners[rank+1]] - ai[owners[rank]];
4882   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4883 
4884   current_space = free_space;
4885 
4886   /* determine symbolic info for each local row */
4887   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4888 
4889   for (k=0; k<merge->nrecv; k++) {
4890     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4891     nrows       = *buf_ri_k[k];
4892     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4893     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4894   }
4895 
4896   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4897   len  = 0;
4898   for (i=0; i<m; i++) {
4899     bnzi = 0;
4900     /* add local non-zero cols of this proc's seqmat into lnk */
4901     arow  = owners[rank] + i;
4902     anzi  = ai[arow+1] - ai[arow];
4903     aj    = a->j + ai[arow];
4904     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4905     bnzi += nlnk;
4906     /* add received col data into lnk */
4907     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4908       if (i == *nextrow[k]) { /* i-th row */
4909         anzi  = *(nextai[k]+1) - *nextai[k];
4910         aj    = buf_rj[k] + *nextai[k];
4911         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4912         bnzi += nlnk;
4913         nextrow[k]++; nextai[k]++;
4914       }
4915     }
4916     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4917 
4918     /* if free space is not available, make more free space */
4919     if (current_space->local_remaining<bnzi) {
4920       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4921       nspacedouble++;
4922     }
4923     /* copy data into free space, then initialize lnk */
4924     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4925     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4926 
4927     current_space->array           += bnzi;
4928     current_space->local_used      += bnzi;
4929     current_space->local_remaining -= bnzi;
4930 
4931     bi[i+1] = bi[i] + bnzi;
4932   }
4933 
4934   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4935 
4936   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4937   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4938   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4939 
4940   /* create symbolic parallel matrix B_mpi */
4941   /*---------------------------------------*/
4942   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4943   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4944   if (n==PETSC_DECIDE) {
4945     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4946   } else {
4947     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4948   }
4949   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4950   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4951   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4952   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4953   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4954 
4955   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4956   B_mpi->assembled  = PETSC_FALSE;
4957   merge->bi         = bi;
4958   merge->bj         = bj;
4959   merge->buf_ri     = buf_ri;
4960   merge->buf_rj     = buf_rj;
4961   merge->coi        = NULL;
4962   merge->coj        = NULL;
4963   merge->owners_co  = NULL;
4964 
4965   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4966 
4967   /* attach the supporting struct to B_mpi for reuse */
4968   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4969   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4970   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4971   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4972   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4973   *mpimat = B_mpi;
4974 
4975   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4976   PetscFunctionReturn(0);
4977 }
4978 
4979 /*@C
4980       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4981                  matrices from each processor
4982 
4983     Collective
4984 
4985    Input Parameters:
4986 +    comm - the communicators the parallel matrix will live on
4987 .    seqmat - the input sequential matrices
4988 .    m - number of local rows (or PETSC_DECIDE)
4989 .    n - number of local columns (or PETSC_DECIDE)
4990 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4991 
4992    Output Parameter:
4993 .    mpimat - the parallel matrix generated
4994 
4995     Level: advanced
4996 
4997    Notes:
4998      The dimensions of the sequential matrix in each processor MUST be the same.
4999      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5000      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5001 @*/
5002 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5003 {
5004   PetscErrorCode ierr;
5005   PetscMPIInt    size;
5006 
5007   PetscFunctionBegin;
5008   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5009   if (size == 1) {
5010     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5011     if (scall == MAT_INITIAL_MATRIX) {
5012       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5013     } else {
5014       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5015     }
5016     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5017     PetscFunctionReturn(0);
5018   }
5019   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5020   if (scall == MAT_INITIAL_MATRIX) {
5021     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5022   }
5023   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5024   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5025   PetscFunctionReturn(0);
5026 }
5027 
5028 /*@
5029      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5030           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5031           with MatGetSize()
5032 
5033     Not Collective
5034 
5035    Input Parameters:
5036 +    A - the matrix
5037 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5038 
5039    Output Parameter:
5040 .    A_loc - the local sequential matrix generated
5041 
5042     Level: developer
5043 
5044    Notes:
5045      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5046      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5047      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5048      modify the values of the returned A_loc.
5049 
5050 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5051 @*/
5052 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5053 {
5054   PetscErrorCode    ierr;
5055   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5056   Mat_SeqAIJ        *mat,*a,*b;
5057   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5058   const PetscScalar *aa,*ba,*aav,*bav;
5059   PetscScalar       *ca,*cam;
5060   PetscMPIInt       size;
5061   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5062   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5063   PetscBool         match;
5064 
5065   PetscFunctionBegin;
5066   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5067   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5068   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5069   if (size == 1) {
5070     if (scall == MAT_INITIAL_MATRIX) {
5071       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5072       *A_loc = mpimat->A;
5073     } else if (scall == MAT_REUSE_MATRIX) {
5074       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5075     }
5076     PetscFunctionReturn(0);
5077   }
5078 
5079   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5080   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5081   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5082   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5083   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5084   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5085   aa   = aav;
5086   ba   = bav;
5087   if (scall == MAT_INITIAL_MATRIX) {
5088     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5089     ci[0] = 0;
5090     for (i=0; i<am; i++) {
5091       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5092     }
5093     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5094     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5095     k    = 0;
5096     for (i=0; i<am; i++) {
5097       ncols_o = bi[i+1] - bi[i];
5098       ncols_d = ai[i+1] - ai[i];
5099       /* off-diagonal portion of A */
5100       for (jo=0; jo<ncols_o; jo++) {
5101         col = cmap[*bj];
5102         if (col >= cstart) break;
5103         cj[k]   = col; bj++;
5104         ca[k++] = *ba++;
5105       }
5106       /* diagonal portion of A */
5107       for (j=0; j<ncols_d; j++) {
5108         cj[k]   = cstart + *aj++;
5109         ca[k++] = *aa++;
5110       }
5111       /* off-diagonal portion of A */
5112       for (j=jo; j<ncols_o; j++) {
5113         cj[k]   = cmap[*bj++];
5114         ca[k++] = *ba++;
5115       }
5116     }
5117     /* put together the new matrix */
5118     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5119     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5120     /* Since these are PETSc arrays, change flags to free them as necessary. */
5121     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5122     mat->free_a  = PETSC_TRUE;
5123     mat->free_ij = PETSC_TRUE;
5124     mat->nonew   = 0;
5125   } else if (scall == MAT_REUSE_MATRIX) {
5126     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5127 #if defined(PETSC_USE_DEVICE)
5128     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5129 #endif
5130     ci = mat->i; cj = mat->j; cam = mat->a;
5131     for (i=0; i<am; i++) {
5132       /* off-diagonal portion of A */
5133       ncols_o = bi[i+1] - bi[i];
5134       for (jo=0; jo<ncols_o; jo++) {
5135         col = cmap[*bj];
5136         if (col >= cstart) break;
5137         *cam++ = *ba++; bj++;
5138       }
5139       /* diagonal portion of A */
5140       ncols_d = ai[i+1] - ai[i];
5141       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5142       /* off-diagonal portion of A */
5143       for (j=jo; j<ncols_o; j++) {
5144         *cam++ = *ba++; bj++;
5145       }
5146     }
5147   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5148   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5149   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5150   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5151   PetscFunctionReturn(0);
5152 }
5153 
5154 /*@
5155      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5156           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5157 
5158     Not Collective
5159 
5160    Input Parameters:
5161 +    A - the matrix
5162 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5163 
5164    Output Parameters:
5165 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5166 -    A_loc - the local sequential matrix generated
5167 
5168     Level: developer
5169 
5170    Notes:
5171      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5172 
5173 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5174 
5175 @*/
5176 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5177 {
5178   PetscErrorCode ierr;
5179   Mat            Ao,Ad;
5180   const PetscInt *cmap;
5181   PetscMPIInt    size;
5182   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5183 
5184   PetscFunctionBegin;
5185   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5186   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5187   if (size == 1) {
5188     if (scall == MAT_INITIAL_MATRIX) {
5189       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5190       *A_loc = Ad;
5191     } else if (scall == MAT_REUSE_MATRIX) {
5192       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5193     }
5194     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5195     PetscFunctionReturn(0);
5196   }
5197   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5198   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5199   if (f) {
5200     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5201   } else {
5202     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5203     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5204     Mat_SeqAIJ        *c;
5205     PetscInt          *ai = a->i, *aj = a->j;
5206     PetscInt          *bi = b->i, *bj = b->j;
5207     PetscInt          *ci,*cj;
5208     const PetscScalar *aa,*ba;
5209     PetscScalar       *ca;
5210     PetscInt          i,j,am,dn,on;
5211 
5212     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5213     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5214     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5215     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5216     if (scall == MAT_INITIAL_MATRIX) {
5217       PetscInt k;
5218       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5219       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5220       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5221       ci[0] = 0;
5222       for (i=0,k=0; i<am; i++) {
5223         const PetscInt ncols_o = bi[i+1] - bi[i];
5224         const PetscInt ncols_d = ai[i+1] - ai[i];
5225         ci[i+1] = ci[i] + ncols_o + ncols_d;
5226         /* diagonal portion of A */
5227         for (j=0; j<ncols_d; j++,k++) {
5228           cj[k] = *aj++;
5229           ca[k] = *aa++;
5230         }
5231         /* off-diagonal portion of A */
5232         for (j=0; j<ncols_o; j++,k++) {
5233           cj[k] = dn + *bj++;
5234           ca[k] = *ba++;
5235         }
5236       }
5237       /* put together the new matrix */
5238       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5239       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5240       /* Since these are PETSc arrays, change flags to free them as necessary. */
5241       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5242       c->free_a  = PETSC_TRUE;
5243       c->free_ij = PETSC_TRUE;
5244       c->nonew   = 0;
5245       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5246     } else if (scall == MAT_REUSE_MATRIX) {
5247 #if defined(PETSC_HAVE_DEVICE)
5248       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5249 #endif
5250       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5251       ca = c->a;
5252       for (i=0; i<am; i++) {
5253         const PetscInt ncols_d = ai[i+1] - ai[i];
5254         const PetscInt ncols_o = bi[i+1] - bi[i];
5255         /* diagonal portion of A */
5256         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5257         /* off-diagonal portion of A */
5258         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5259       }
5260     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5261     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5262     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5263     if (glob) {
5264       PetscInt cst, *gidx;
5265 
5266       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5267       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5268       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5269       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5270       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5271     }
5272   }
5273   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5274   PetscFunctionReturn(0);
5275 }
5276 
5277 /*@C
5278      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5279 
5280     Not Collective
5281 
5282    Input Parameters:
5283 +    A - the matrix
5284 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5285 -    row, col - index sets of rows and columns to extract (or NULL)
5286 
5287    Output Parameter:
5288 .    A_loc - the local sequential matrix generated
5289 
5290     Level: developer
5291 
5292 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5293 
5294 @*/
5295 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5296 {
5297   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5298   PetscErrorCode ierr;
5299   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5300   IS             isrowa,iscola;
5301   Mat            *aloc;
5302   PetscBool      match;
5303 
5304   PetscFunctionBegin;
5305   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5306   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5307   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5308   if (!row) {
5309     start = A->rmap->rstart; end = A->rmap->rend;
5310     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5311   } else {
5312     isrowa = *row;
5313   }
5314   if (!col) {
5315     start = A->cmap->rstart;
5316     cmap  = a->garray;
5317     nzA   = a->A->cmap->n;
5318     nzB   = a->B->cmap->n;
5319     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5320     ncols = 0;
5321     for (i=0; i<nzB; i++) {
5322       if (cmap[i] < start) idx[ncols++] = cmap[i];
5323       else break;
5324     }
5325     imark = i;
5326     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5327     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5328     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5329   } else {
5330     iscola = *col;
5331   }
5332   if (scall != MAT_INITIAL_MATRIX) {
5333     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5334     aloc[0] = *A_loc;
5335   }
5336   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5337   if (!col) { /* attach global id of condensed columns */
5338     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5339   }
5340   *A_loc = aloc[0];
5341   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5342   if (!row) {
5343     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5344   }
5345   if (!col) {
5346     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5347   }
5348   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5349   PetscFunctionReturn(0);
5350 }
5351 
5352 /*
5353  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5354  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5355  * on a global size.
5356  * */
5357 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5358 {
5359   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5360   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5361   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5362   PetscMPIInt              owner;
5363   PetscSFNode              *iremote,*oiremote;
5364   const PetscInt           *lrowindices;
5365   PetscErrorCode           ierr;
5366   PetscSF                  sf,osf;
5367   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5368   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5369   MPI_Comm                 comm;
5370   ISLocalToGlobalMapping   mapping;
5371 
5372   PetscFunctionBegin;
5373   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5374   /* plocalsize is the number of roots
5375    * nrows is the number of leaves
5376    * */
5377   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5378   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5379   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5380   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5381   for (i=0;i<nrows;i++) {
5382     /* Find a remote index and an owner for a row
5383      * The row could be local or remote
5384      * */
5385     owner = 0;
5386     lidx  = 0;
5387     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5388     iremote[i].index = lidx;
5389     iremote[i].rank  = owner;
5390   }
5391   /* Create SF to communicate how many nonzero columns for each row */
5392   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5393   /* SF will figure out the number of nonzero colunms for each row, and their
5394    * offsets
5395    * */
5396   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5397   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5398   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5399 
5400   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5401   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5402   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5403   roffsets[0] = 0;
5404   roffsets[1] = 0;
5405   for (i=0;i<plocalsize;i++) {
5406     /* diag */
5407     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5408     /* off diag */
5409     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5410     /* compute offsets so that we relative location for each row */
5411     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5412     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5413   }
5414   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5415   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5416   /* 'r' means root, and 'l' means leaf */
5417   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5418   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5419   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5420   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5421   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5422   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5423   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5424   dntotalcols = 0;
5425   ontotalcols = 0;
5426   ncol = 0;
5427   for (i=0;i<nrows;i++) {
5428     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5429     ncol = PetscMax(pnnz[i],ncol);
5430     /* diag */
5431     dntotalcols += nlcols[i*2+0];
5432     /* off diag */
5433     ontotalcols += nlcols[i*2+1];
5434   }
5435   /* We do not need to figure the right number of columns
5436    * since all the calculations will be done by going through the raw data
5437    * */
5438   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5439   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5440   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5441   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5442   /* diag */
5443   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5444   /* off diag */
5445   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5446   /* diag */
5447   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5448   /* off diag */
5449   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5450   dntotalcols = 0;
5451   ontotalcols = 0;
5452   ntotalcols  = 0;
5453   for (i=0;i<nrows;i++) {
5454     owner = 0;
5455     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5456     /* Set iremote for diag matrix */
5457     for (j=0;j<nlcols[i*2+0];j++) {
5458       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5459       iremote[dntotalcols].rank    = owner;
5460       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5461       ilocal[dntotalcols++]        = ntotalcols++;
5462     }
5463     /* off diag */
5464     for (j=0;j<nlcols[i*2+1];j++) {
5465       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5466       oiremote[ontotalcols].rank    = owner;
5467       oilocal[ontotalcols++]        = ntotalcols++;
5468     }
5469   }
5470   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5471   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5472   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5473   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5474   /* P serves as roots and P_oth is leaves
5475    * Diag matrix
5476    * */
5477   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5478   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5479   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5480 
5481   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5482   /* Off diag */
5483   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5484   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5485   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5486   /* We operate on the matrix internal data for saving memory */
5487   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5488   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5489   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5490   /* Convert to global indices for diag matrix */
5491   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5492   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5493   /* We want P_oth store global indices */
5494   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5495   /* Use memory scalable approach */
5496   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5497   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5498   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5499   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5500   /* Convert back to local indices */
5501   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5502   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5503   nout = 0;
5504   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5505   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5506   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5507   /* Exchange values */
5508   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5509   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5510   /* Stop PETSc from shrinking memory */
5511   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5512   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5513   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5514   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5515   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5516   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5517   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5518   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5519   PetscFunctionReturn(0);
5520 }
5521 
5522 /*
5523  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5524  * This supports MPIAIJ and MAIJ
5525  * */
5526 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5527 {
5528   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5529   Mat_SeqAIJ            *p_oth;
5530   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5531   IS                    rows,map;
5532   PetscHMapI            hamp;
5533   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5534   MPI_Comm              comm;
5535   PetscSF               sf,osf;
5536   PetscBool             has;
5537   PetscErrorCode        ierr;
5538 
5539   PetscFunctionBegin;
5540   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5541   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5542   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5543    *  and then create a submatrix (that often is an overlapping matrix)
5544    * */
5545   if (reuse == MAT_INITIAL_MATRIX) {
5546     /* Use a hash table to figure out unique keys */
5547     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5548     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5549     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5550     count = 0;
5551     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5552     for (i=0;i<a->B->cmap->n;i++) {
5553       key  = a->garray[i]/dof;
5554       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5555       if (!has) {
5556         mapping[i] = count;
5557         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5558       } else {
5559         /* Current 'i' has the same value the previous step */
5560         mapping[i] = count-1;
5561       }
5562     }
5563     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5564     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5565     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5566     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5567     off = 0;
5568     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5569     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5570     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5571     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5572     /* In case, the matrix was already created but users want to recreate the matrix */
5573     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5574     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5575     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5576     ierr = ISDestroy(&map);CHKERRQ(ierr);
5577     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5578   } else if (reuse == MAT_REUSE_MATRIX) {
5579     /* If matrix was already created, we simply update values using SF objects
5580      * that as attached to the matrix ealier.
5581      *  */
5582     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5583     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5584     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5585     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5586     /* Update values in place */
5587     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5588     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5589     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5590     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5591   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5592   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5593   PetscFunctionReturn(0);
5594 }
5595 
5596 /*@C
5597     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5598 
5599     Collective on Mat
5600 
5601    Input Parameters:
5602 +    A - the first matrix in mpiaij format
5603 .    B - the second matrix in mpiaij format
5604 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5605 
5606    Input/Output Parameters:
5607 +    rowb - index sets of rows of B to extract (or NULL), modified on output
5608 -    colb - index sets of columns of B to extract (or NULL), modified on output
5609 
5610    Output Parameter:
5611 .    B_seq - the sequential matrix generated
5612 
5613     Level: developer
5614 
5615 @*/
5616 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5617 {
5618   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5619   PetscErrorCode ierr;
5620   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5621   IS             isrowb,iscolb;
5622   Mat            *bseq=NULL;
5623 
5624   PetscFunctionBegin;
5625   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5626     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5627   }
5628   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5629 
5630   if (scall == MAT_INITIAL_MATRIX) {
5631     start = A->cmap->rstart;
5632     cmap  = a->garray;
5633     nzA   = a->A->cmap->n;
5634     nzB   = a->B->cmap->n;
5635     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5636     ncols = 0;
5637     for (i=0; i<nzB; i++) {  /* row < local row index */
5638       if (cmap[i] < start) idx[ncols++] = cmap[i];
5639       else break;
5640     }
5641     imark = i;
5642     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5643     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5644     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5645     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5646   } else {
5647     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5648     isrowb  = *rowb; iscolb = *colb;
5649     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5650     bseq[0] = *B_seq;
5651   }
5652   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5653   *B_seq = bseq[0];
5654   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5655   if (!rowb) {
5656     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5657   } else {
5658     *rowb = isrowb;
5659   }
5660   if (!colb) {
5661     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5662   } else {
5663     *colb = iscolb;
5664   }
5665   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5666   PetscFunctionReturn(0);
5667 }
5668 
5669 /*
5670     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5671     of the OFF-DIAGONAL portion of local A
5672 
5673     Collective on Mat
5674 
5675    Input Parameters:
5676 +    A,B - the matrices in mpiaij format
5677 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5678 
5679    Output Parameter:
5680 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5681 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5682 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5683 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5684 
5685     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5686      for this matrix. This is not desirable..
5687 
5688     Level: developer
5689 
5690 */
5691 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5692 {
5693   PetscErrorCode         ierr;
5694   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5695   Mat_SeqAIJ             *b_oth;
5696   VecScatter             ctx;
5697   MPI_Comm               comm;
5698   const PetscMPIInt      *rprocs,*sprocs;
5699   const PetscInt         *srow,*rstarts,*sstarts;
5700   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5701   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5702   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5703   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5704   PetscMPIInt            size,tag,rank,nreqs;
5705 
5706   PetscFunctionBegin;
5707   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5708   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5709 
5710   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5711     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5712   }
5713   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5714   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5715 
5716   if (size == 1) {
5717     startsj_s = NULL;
5718     bufa_ptr  = NULL;
5719     *B_oth    = NULL;
5720     PetscFunctionReturn(0);
5721   }
5722 
5723   ctx = a->Mvctx;
5724   tag = ((PetscObject)ctx)->tag;
5725 
5726   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5727   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5728   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5729   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5730   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5731   rwaits = reqs;
5732   swaits = reqs + nrecvs;
5733 
5734   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5735   if (scall == MAT_INITIAL_MATRIX) {
5736     /* i-array */
5737     /*---------*/
5738     /*  post receives */
5739     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5740     for (i=0; i<nrecvs; i++) {
5741       rowlen = rvalues + rstarts[i]*rbs;
5742       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5743       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5744     }
5745 
5746     /* pack the outgoing message */
5747     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5748 
5749     sstartsj[0] = 0;
5750     rstartsj[0] = 0;
5751     len         = 0; /* total length of j or a array to be sent */
5752     if (nsends) {
5753       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5754       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5755     }
5756     for (i=0; i<nsends; i++) {
5757       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5758       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5759       for (j=0; j<nrows; j++) {
5760         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5761         for (l=0; l<sbs; l++) {
5762           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5763 
5764           rowlen[j*sbs+l] = ncols;
5765 
5766           len += ncols;
5767           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5768         }
5769         k++;
5770       }
5771       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5772 
5773       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5774     }
5775     /* recvs and sends of i-array are completed */
5776     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5777     ierr = PetscFree(svalues);CHKERRQ(ierr);
5778 
5779     /* allocate buffers for sending j and a arrays */
5780     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5781     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5782 
5783     /* create i-array of B_oth */
5784     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5785 
5786     b_othi[0] = 0;
5787     len       = 0; /* total length of j or a array to be received */
5788     k         = 0;
5789     for (i=0; i<nrecvs; i++) {
5790       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5791       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5792       for (j=0; j<nrows; j++) {
5793         b_othi[k+1] = b_othi[k] + rowlen[j];
5794         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5795         k++;
5796       }
5797       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5798     }
5799     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5800 
5801     /* allocate space for j and a arrrays of B_oth */
5802     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5803     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5804 
5805     /* j-array */
5806     /*---------*/
5807     /*  post receives of j-array */
5808     for (i=0; i<nrecvs; i++) {
5809       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5810       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5811     }
5812 
5813     /* pack the outgoing message j-array */
5814     if (nsends) k = sstarts[0];
5815     for (i=0; i<nsends; i++) {
5816       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5817       bufJ  = bufj+sstartsj[i];
5818       for (j=0; j<nrows; j++) {
5819         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5820         for (ll=0; ll<sbs; ll++) {
5821           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5822           for (l=0; l<ncols; l++) {
5823             *bufJ++ = cols[l];
5824           }
5825           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5826         }
5827       }
5828       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5829     }
5830 
5831     /* recvs and sends of j-array are completed */
5832     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5833   } else if (scall == MAT_REUSE_MATRIX) {
5834     sstartsj = *startsj_s;
5835     rstartsj = *startsj_r;
5836     bufa     = *bufa_ptr;
5837     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5838     b_otha   = b_oth->a;
5839 #if defined(PETSC_HAVE_DEVICE)
5840     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5841 #endif
5842   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5843 
5844   /* a-array */
5845   /*---------*/
5846   /*  post receives of a-array */
5847   for (i=0; i<nrecvs; i++) {
5848     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5849     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5850   }
5851 
5852   /* pack the outgoing message a-array */
5853   if (nsends) k = sstarts[0];
5854   for (i=0; i<nsends; i++) {
5855     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5856     bufA  = bufa+sstartsj[i];
5857     for (j=0; j<nrows; j++) {
5858       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5859       for (ll=0; ll<sbs; ll++) {
5860         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5861         for (l=0; l<ncols; l++) {
5862           *bufA++ = vals[l];
5863         }
5864         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5865       }
5866     }
5867     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5868   }
5869   /* recvs and sends of a-array are completed */
5870   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5871   ierr = PetscFree(reqs);CHKERRQ(ierr);
5872 
5873   if (scall == MAT_INITIAL_MATRIX) {
5874     /* put together the new matrix */
5875     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5876 
5877     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5878     /* Since these are PETSc arrays, change flags to free them as necessary. */
5879     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5880     b_oth->free_a  = PETSC_TRUE;
5881     b_oth->free_ij = PETSC_TRUE;
5882     b_oth->nonew   = 0;
5883 
5884     ierr = PetscFree(bufj);CHKERRQ(ierr);
5885     if (!startsj_s || !bufa_ptr) {
5886       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5887       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5888     } else {
5889       *startsj_s = sstartsj;
5890       *startsj_r = rstartsj;
5891       *bufa_ptr  = bufa;
5892     }
5893   }
5894 
5895   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5896   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5897   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5898   PetscFunctionReturn(0);
5899 }
5900 
5901 /*@C
5902   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5903 
5904   Not Collective
5905 
5906   Input Parameter:
5907 . A - The matrix in mpiaij format
5908 
5909   Output Parameters:
5910 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5911 . colmap - A map from global column index to local index into lvec
5912 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5913 
5914   Level: developer
5915 
5916 @*/
5917 #if defined(PETSC_USE_CTABLE)
5918 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5919 #else
5920 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5921 #endif
5922 {
5923   Mat_MPIAIJ *a;
5924 
5925   PetscFunctionBegin;
5926   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5927   PetscValidPointer(lvec, 2);
5928   PetscValidPointer(colmap, 3);
5929   PetscValidPointer(multScatter, 4);
5930   a = (Mat_MPIAIJ*) A->data;
5931   if (lvec) *lvec = a->lvec;
5932   if (colmap) *colmap = a->colmap;
5933   if (multScatter) *multScatter = a->Mvctx;
5934   PetscFunctionReturn(0);
5935 }
5936 
5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5940 #if defined(PETSC_HAVE_MKL_SPARSE)
5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5942 #endif
5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5944 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5945 #if defined(PETSC_HAVE_ELEMENTAL)
5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5947 #endif
5948 #if defined(PETSC_HAVE_SCALAPACK)
5949 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5950 #endif
5951 #if defined(PETSC_HAVE_HYPRE)
5952 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5953 #endif
5954 #if defined(PETSC_HAVE_CUDA)
5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5956 #endif
5957 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5959 #endif
5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5961 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5962 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5963 
5964 /*
5965     Computes (B'*A')' since computing B*A directly is untenable
5966 
5967                n                       p                          p
5968         [             ]       [             ]         [                 ]
5969       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5970         [             ]       [             ]         [                 ]
5971 
5972 */
5973 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5974 {
5975   PetscErrorCode ierr;
5976   Mat            At,Bt,Ct;
5977 
5978   PetscFunctionBegin;
5979   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5980   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5981   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5982   ierr = MatDestroy(&At);CHKERRQ(ierr);
5983   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5984   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5985   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5986   PetscFunctionReturn(0);
5987 }
5988 
5989 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5990 {
5991   PetscErrorCode ierr;
5992   PetscBool      cisdense;
5993 
5994   PetscFunctionBegin;
5995   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5996   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5997   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5998   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5999   if (!cisdense) {
6000     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6001   }
6002   ierr = MatSetUp(C);CHKERRQ(ierr);
6003 
6004   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6005   PetscFunctionReturn(0);
6006 }
6007 
6008 /* ----------------------------------------------------------------*/
6009 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6010 {
6011   Mat_Product *product = C->product;
6012   Mat         A = product->A,B=product->B;
6013 
6014   PetscFunctionBegin;
6015   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6016     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6017 
6018   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6019   C->ops->productsymbolic = MatProductSymbolic_AB;
6020   PetscFunctionReturn(0);
6021 }
6022 
6023 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6024 {
6025   PetscErrorCode ierr;
6026   Mat_Product    *product = C->product;
6027 
6028   PetscFunctionBegin;
6029   if (product->type == MATPRODUCT_AB) {
6030     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6031   }
6032   PetscFunctionReturn(0);
6033 }
6034 /* ----------------------------------------------------------------*/
6035 
6036 /*MC
6037    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6038 
6039    Options Database Keys:
6040 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6041 
6042    Level: beginner
6043 
6044    Notes:
6045     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6046     in this case the values associated with the rows and columns one passes in are set to zero
6047     in the matrix
6048 
6049     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6050     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6051 
6052 .seealso: MatCreateAIJ()
6053 M*/
6054 
6055 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6056 {
6057   Mat_MPIAIJ     *b;
6058   PetscErrorCode ierr;
6059   PetscMPIInt    size;
6060 
6061   PetscFunctionBegin;
6062   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6063 
6064   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6065   B->data       = (void*)b;
6066   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6067   B->assembled  = PETSC_FALSE;
6068   B->insertmode = NOT_SET_VALUES;
6069   b->size       = size;
6070 
6071   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6072 
6073   /* build cache for off array entries formed */
6074   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6075 
6076   b->donotstash  = PETSC_FALSE;
6077   b->colmap      = NULL;
6078   b->garray      = NULL;
6079   b->roworiented = PETSC_TRUE;
6080 
6081   /* stuff used for matrix vector multiply */
6082   b->lvec  = NULL;
6083   b->Mvctx = NULL;
6084 
6085   /* stuff for MatGetRow() */
6086   b->rowindices   = NULL;
6087   b->rowvalues    = NULL;
6088   b->getrowactive = PETSC_FALSE;
6089 
6090   /* flexible pointer used in CUSPARSE classes */
6091   b->spptr = NULL;
6092 
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6103 #if defined(PETSC_HAVE_CUDA)
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6105 #endif
6106 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6108 #endif
6109 #if defined(PETSC_HAVE_MKL_SPARSE)
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6111 #endif
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6116 #if defined(PETSC_HAVE_ELEMENTAL)
6117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6118 #endif
6119 #if defined(PETSC_HAVE_SCALAPACK)
6120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6121 #endif
6122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6124 #if defined(PETSC_HAVE_HYPRE)
6125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6127 #endif
6128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6129   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6130   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6131   PetscFunctionReturn(0);
6132 }
6133 
6134 /*@C
6135      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6136          and "off-diagonal" part of the matrix in CSR format.
6137 
6138    Collective
6139 
6140    Input Parameters:
6141 +  comm - MPI communicator
6142 .  m - number of local rows (Cannot be PETSC_DECIDE)
6143 .  n - This value should be the same as the local size used in creating the
6144        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6145        calculated if N is given) For square matrices n is almost always m.
6146 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6147 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6148 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6149 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6150 .   a - matrix values
6151 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6152 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6153 -   oa - matrix values
6154 
6155    Output Parameter:
6156 .   mat - the matrix
6157 
6158    Level: advanced
6159 
6160    Notes:
6161        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6162        must free the arrays once the matrix has been destroyed and not before.
6163 
6164        The i and j indices are 0 based
6165 
6166        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6167 
6168        This sets local rows and cannot be used to set off-processor values.
6169 
6170        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6171        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6172        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6173        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6174        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6175        communication if it is known that only local entries will be set.
6176 
6177 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6178           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6179 @*/
6180 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6181 {
6182   PetscErrorCode ierr;
6183   Mat_MPIAIJ     *maij;
6184 
6185   PetscFunctionBegin;
6186   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6187   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6188   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6189   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6190   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6191   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6192   maij = (Mat_MPIAIJ*) (*mat)->data;
6193 
6194   (*mat)->preallocated = PETSC_TRUE;
6195 
6196   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6197   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6198 
6199   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6200   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6201 
6202   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6203   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6204   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6205   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6206   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6207   PetscFunctionReturn(0);
6208 }
6209 
6210 /*
6211     Special version for direct calls from Fortran
6212 */
6213 #include <petsc/private/fortranimpl.h>
6214 
6215 /* Change these macros so can be used in void function */
6216 #undef CHKERRQ
6217 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6218 #undef SETERRQ2
6219 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6220 #undef SETERRQ3
6221 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6222 #undef SETERRQ
6223 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6224 
6225 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6226 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6227 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6228 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6229 #else
6230 #endif
6231 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6232 {
6233   Mat            mat  = *mmat;
6234   PetscInt       m    = *mm, n = *mn;
6235   InsertMode     addv = *maddv;
6236   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6237   PetscScalar    value;
6238   PetscErrorCode ierr;
6239 
6240   MatCheckPreallocated(mat,1);
6241   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6242   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6243   {
6244     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6245     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6246     PetscBool roworiented = aij->roworiented;
6247 
6248     /* Some Variables required in the macro */
6249     Mat        A                    = aij->A;
6250     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6251     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6252     MatScalar  *aa                  = a->a;
6253     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6254     Mat        B                    = aij->B;
6255     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6256     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6257     MatScalar  *ba                  = b->a;
6258     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6259      * cannot use "#if defined" inside a macro. */
6260     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6261 
6262     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6263     PetscInt  nonew = a->nonew;
6264     MatScalar *ap1,*ap2;
6265 
6266     PetscFunctionBegin;
6267     for (i=0; i<m; i++) {
6268       if (im[i] < 0) continue;
6269       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6270       if (im[i] >= rstart && im[i] < rend) {
6271         row      = im[i] - rstart;
6272         lastcol1 = -1;
6273         rp1      = aj + ai[row];
6274         ap1      = aa + ai[row];
6275         rmax1    = aimax[row];
6276         nrow1    = ailen[row];
6277         low1     = 0;
6278         high1    = nrow1;
6279         lastcol2 = -1;
6280         rp2      = bj + bi[row];
6281         ap2      = ba + bi[row];
6282         rmax2    = bimax[row];
6283         nrow2    = bilen[row];
6284         low2     = 0;
6285         high2    = nrow2;
6286 
6287         for (j=0; j<n; j++) {
6288           if (roworiented) value = v[i*n+j];
6289           else value = v[i+j*m];
6290           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6291           if (in[j] >= cstart && in[j] < cend) {
6292             col = in[j] - cstart;
6293             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6294 #if defined(PETSC_HAVE_DEVICE)
6295             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6296 #endif
6297           } else if (in[j] < 0) continue;
6298           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6299             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6300             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6301           } else {
6302             if (mat->was_assembled) {
6303               if (!aij->colmap) {
6304                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6305               }
6306 #if defined(PETSC_USE_CTABLE)
6307               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6308               col--;
6309 #else
6310               col = aij->colmap[in[j]] - 1;
6311 #endif
6312               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6313                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6314                 col  =  in[j];
6315                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6316                 B        = aij->B;
6317                 b        = (Mat_SeqAIJ*)B->data;
6318                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6319                 rp2      = bj + bi[row];
6320                 ap2      = ba + bi[row];
6321                 rmax2    = bimax[row];
6322                 nrow2    = bilen[row];
6323                 low2     = 0;
6324                 high2    = nrow2;
6325                 bm       = aij->B->rmap->n;
6326                 ba       = b->a;
6327                 inserted = PETSC_FALSE;
6328               }
6329             } else col = in[j];
6330             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6331 #if defined(PETSC_HAVE_DEVICE)
6332             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6333 #endif
6334           }
6335         }
6336       } else if (!aij->donotstash) {
6337         if (roworiented) {
6338           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6339         } else {
6340           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6341         }
6342       }
6343     }
6344   }
6345   PetscFunctionReturnVoid();
6346 }
6347 
6348 typedef struct {
6349   Mat       *mp;    /* intermediate products */
6350   PetscBool *mptmp; /* is the intermediate product temporary ? */
6351   PetscInt  cp;     /* number of intermediate products */
6352 
6353   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6354   PetscInt    *startsj_s,*startsj_r;
6355   PetscScalar *bufa;
6356   Mat         P_oth;
6357 
6358   /* may take advantage of merging product->B */
6359   Mat Bloc; /* B-local by merging diag and off-diag */
6360 
6361   /* cusparse does not have support to split between symbolic and numeric phases.
6362      When api_user is true, we don't need to update the numerical values
6363      of the temporary storage */
6364   PetscBool reusesym;
6365 
6366   /* support for COO values insertion */
6367   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6368   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6369   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6370   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6371   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6372   PetscMemType mtype;
6373 
6374   /* customization */
6375   PetscBool abmerge;
6376   PetscBool P_oth_bind;
6377 } MatMatMPIAIJBACKEND;
6378 
6379 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6380 {
6381   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6382   PetscInt            i;
6383   PetscErrorCode      ierr;
6384 
6385   PetscFunctionBegin;
6386   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6387   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6388   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6389   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6390   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6391   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6392   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6393   for (i = 0; i < mmdata->cp; i++) {
6394     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6395   }
6396   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6397   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6398   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6399   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6400   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6401   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6402   PetscFunctionReturn(0);
6403 }
6404 
6405 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6406 {
6407   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6408   PetscErrorCode ierr;
6409 
6410   PetscFunctionBegin;
6411   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6412   if (f) {
6413     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6414   } else {
6415     const PetscScalar *vv;
6416 
6417     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6418     if (n && idx) {
6419       PetscScalar    *w = v;
6420       const PetscInt *oi = idx;
6421       PetscInt       j;
6422 
6423       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6424     } else {
6425       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6426     }
6427     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6428   }
6429   PetscFunctionReturn(0);
6430 }
6431 
6432 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6433 {
6434   MatMatMPIAIJBACKEND *mmdata;
6435   PetscInt            i,n_d,n_o;
6436   PetscErrorCode      ierr;
6437 
6438   PetscFunctionBegin;
6439   MatCheckProduct(C,1);
6440   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6441   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6442   if (!mmdata->reusesym) { /* update temporary matrices */
6443     if (mmdata->P_oth) {
6444       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6445     }
6446     if (mmdata->Bloc) {
6447       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6448     }
6449   }
6450   mmdata->reusesym = PETSC_FALSE;
6451 
6452   for (i = 0; i < mmdata->cp; i++) {
6453     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6454     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6455   }
6456   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6457     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6458 
6459     if (mmdata->mptmp[i]) continue;
6460     if (noff) {
6461       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6462 
6463       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6464       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6465       n_o += noff;
6466       n_d += nown;
6467     } else {
6468       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6469 
6470       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6471       n_d += mm->nz;
6472     }
6473   }
6474   if (mmdata->hasoffproc) { /* offprocess insertion */
6475     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6476     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6477   }
6478   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6479   PetscFunctionReturn(0);
6480 }
6481 
6482 /* Support for Pt * A, A * P, or Pt * A * P */
6483 #define MAX_NUMBER_INTERMEDIATE 4
6484 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6485 {
6486   Mat_Product            *product = C->product;
6487   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6488   Mat_MPIAIJ             *a,*p;
6489   MatMatMPIAIJBACKEND    *mmdata;
6490   ISLocalToGlobalMapping P_oth_l2g = NULL;
6491   IS                     glob = NULL;
6492   const char             *prefix;
6493   char                   pprefix[256];
6494   const PetscInt         *globidx,*P_oth_idx;
6495   PetscInt               i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j;
6496   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6497                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6498                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6499   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6500 
6501   MatProductType         ptype;
6502   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6503   PetscMPIInt            size;
6504   PetscErrorCode         ierr;
6505 
6506   PetscFunctionBegin;
6507   MatCheckProduct(C,1);
6508   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6509   ptype = product->type;
6510   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6511     ptype = MATPRODUCT_AB;
6512     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6513   }
6514   switch (ptype) {
6515   case MATPRODUCT_AB:
6516     A = product->A;
6517     P = product->B;
6518     m = A->rmap->n;
6519     n = P->cmap->n;
6520     M = A->rmap->N;
6521     N = P->cmap->N;
6522     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6523     break;
6524   case MATPRODUCT_AtB:
6525     P = product->A;
6526     A = product->B;
6527     m = P->cmap->n;
6528     n = A->cmap->n;
6529     M = P->cmap->N;
6530     N = A->cmap->N;
6531     hasoffproc = PETSC_TRUE;
6532     break;
6533   case MATPRODUCT_PtAP:
6534     A = product->A;
6535     P = product->B;
6536     m = P->cmap->n;
6537     n = P->cmap->n;
6538     M = P->cmap->N;
6539     N = P->cmap->N;
6540     hasoffproc = PETSC_TRUE;
6541     break;
6542   default:
6543     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6544   }
6545   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6546   if (size == 1) hasoffproc = PETSC_FALSE;
6547 
6548   /* defaults */
6549   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6550     mp[i]    = NULL;
6551     mptmp[i] = PETSC_FALSE;
6552     rmapt[i] = -1;
6553     cmapt[i] = -1;
6554     rmapa[i] = NULL;
6555     cmapa[i] = NULL;
6556   }
6557 
6558   /* customization */
6559   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6560   mmdata->reusesym = product->api_user;
6561   if (ptype == MATPRODUCT_AB) {
6562     if (product->api_user) {
6563       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6564       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6565       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6566       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6567     } else {
6568       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6569       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6570       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6571       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6572     }
6573   } else if (ptype == MATPRODUCT_PtAP) {
6574     if (product->api_user) {
6575       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6576       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6577       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6578     } else {
6579       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6580       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6581       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6582     }
6583   }
6584   a = (Mat_MPIAIJ*)A->data;
6585   p = (Mat_MPIAIJ*)P->data;
6586   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6587   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6588   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6589   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6590   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6591 
6592   cp   = 0;
6593   switch (ptype) {
6594   case MATPRODUCT_AB: /* A * P */
6595     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6596 
6597     /* A_diag * P_local (merged or not) */
6598     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6599       /* P is product->B */
6600       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6601       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6602       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6603       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6604       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6605       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6606       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6607       mp[cp]->product->api_user = product->api_user;
6608       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6609       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6610       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6611       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6612       rmapt[cp] = 1;
6613       cmapt[cp] = 2;
6614       cmapa[cp] = globidx;
6615       mptmp[cp] = PETSC_FALSE;
6616       cp++;
6617     } else { /* A_diag * P_diag and A_diag * P_off */
6618       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6619       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6620       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6621       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6622       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6623       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6624       mp[cp]->product->api_user = product->api_user;
6625       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6626       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6627       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6628       rmapt[cp] = 1;
6629       cmapt[cp] = 1;
6630       mptmp[cp] = PETSC_FALSE;
6631       cp++;
6632       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6633       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6634       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6635       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6636       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6637       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6638       mp[cp]->product->api_user = product->api_user;
6639       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6640       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6641       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6642       rmapt[cp] = 1;
6643       cmapt[cp] = 2;
6644       cmapa[cp] = p->garray;
6645       mptmp[cp] = PETSC_FALSE;
6646       cp++;
6647     }
6648 
6649     /* A_off * P_other */
6650     if (mmdata->P_oth) {
6651       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
6652       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6653       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6654       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6655       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6656       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6657       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6658       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6659       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6660       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6661       mp[cp]->product->api_user = product->api_user;
6662       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6663       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6664       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6665       rmapt[cp] = 1;
6666       cmapt[cp] = 2;
6667       cmapa[cp] = P_oth_idx;
6668       mptmp[cp] = PETSC_FALSE;
6669       cp++;
6670     }
6671     break;
6672 
6673   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6674     /* A is product->B */
6675     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6676     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6677       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6678       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6679       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6680       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6681       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6682       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6683       mp[cp]->product->api_user = product->api_user;
6684       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6685       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6686       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6687       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6688       rmapt[cp] = 2;
6689       rmapa[cp] = globidx;
6690       cmapt[cp] = 2;
6691       cmapa[cp] = globidx;
6692       mptmp[cp] = PETSC_FALSE;
6693       cp++;
6694     } else {
6695       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6696       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6697       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6698       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6699       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6700       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6701       mp[cp]->product->api_user = product->api_user;
6702       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6703       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6704       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6705       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6706       rmapt[cp] = 1;
6707       cmapt[cp] = 2;
6708       cmapa[cp] = globidx;
6709       mptmp[cp] = PETSC_FALSE;
6710       cp++;
6711       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6712       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6713       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6714       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6715       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6716       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6717       mp[cp]->product->api_user = product->api_user;
6718       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6719       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6720       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6721       rmapt[cp] = 2;
6722       rmapa[cp] = p->garray;
6723       cmapt[cp] = 2;
6724       cmapa[cp] = globidx;
6725       mptmp[cp] = PETSC_FALSE;
6726       cp++;
6727     }
6728     break;
6729   case MATPRODUCT_PtAP:
6730     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6731     /* P is product->B */
6732     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6733     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6734     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6735     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6736     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6737     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6738     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6739     mp[cp]->product->api_user = product->api_user;
6740     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6741     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6742     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6743     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6744     rmapt[cp] = 2;
6745     rmapa[cp] = globidx;
6746     cmapt[cp] = 2;
6747     cmapa[cp] = globidx;
6748     mptmp[cp] = PETSC_FALSE;
6749     cp++;
6750     if (mmdata->P_oth) {
6751       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6752       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6753       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6754       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6755       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6756       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6757       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6758       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6759       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6760       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6761       mp[cp]->product->api_user = product->api_user;
6762       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6763       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6764       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6765       mptmp[cp] = PETSC_TRUE;
6766       cp++;
6767       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6768       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6769       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6770       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6771       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6772       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6773       mp[cp]->product->api_user = product->api_user;
6774       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6775       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6776       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6777       rmapt[cp] = 2;
6778       rmapa[cp] = globidx;
6779       cmapt[cp] = 2;
6780       cmapa[cp] = P_oth_idx;
6781       mptmp[cp] = PETSC_FALSE;
6782       cp++;
6783     }
6784     break;
6785   default:
6786     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6787   }
6788   /* sanity check */
6789   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6790 
6791   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
6792   for (i = 0; i < cp; i++) {
6793     mmdata->mp[i]    = mp[i];
6794     mmdata->mptmp[i] = mptmp[i];
6795   }
6796   mmdata->cp = cp;
6797   C->product->data       = mmdata;
6798   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6799   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6800 
6801   /* memory type */
6802   mmdata->mtype = PETSC_MEMTYPE_HOST;
6803   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6804   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6805   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6806   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6807   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6808 
6809   /* prepare coo coordinates for values insertion */
6810 
6811   /* count total nonzeros of those intermediate seqaij Mats
6812     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
6813     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
6814     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
6815   */
6816   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6817     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6818     if (mptmp[cp]) continue;
6819     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
6820       const PetscInt *rmap = rmapa[cp];
6821       const PetscInt mr = mp[cp]->rmap->n;
6822       const PetscInt rs = C->rmap->rstart;
6823       const PetscInt re = C->rmap->rend;
6824       const PetscInt *ii  = mm->i;
6825       for (i = 0; i < mr; i++) {
6826         const PetscInt gr = rmap[i];
6827         const PetscInt nz = ii[i+1] - ii[i];
6828         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
6829         else ncoo_oown += nz; /* this row is local */
6830       }
6831     } else ncoo_d += mm->nz;
6832   }
6833 
6834   /*
6835     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
6836 
6837     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
6838 
6839     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
6840 
6841     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
6842     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
6843     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
6844 
6845     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
6846     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
6847   */
6848   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
6849   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6850 
6851   /* gather (i,j) of nonzeros inserted by remote procs */
6852   if (hasoffproc) {
6853     PetscSF  msf;
6854     PetscInt ncoo2,*coo_i2,*coo_j2;
6855 
6856     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6857     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6858     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
6859 
6860     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6861       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6862       PetscInt   *idxoff = mmdata->off[cp];
6863       PetscInt   *idxown = mmdata->own[cp];
6864       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
6865         const PetscInt *rmap = rmapa[cp];
6866         const PetscInt *cmap = cmapa[cp];
6867         const PetscInt *ii  = mm->i;
6868         PetscInt       *coi = coo_i + ncoo_o;
6869         PetscInt       *coj = coo_j + ncoo_o;
6870         const PetscInt mr = mp[cp]->rmap->n;
6871         const PetscInt rs = C->rmap->rstart;
6872         const PetscInt re = C->rmap->rend;
6873         const PetscInt cs = C->cmap->rstart;
6874         for (i = 0; i < mr; i++) {
6875           const PetscInt *jj = mm->j + ii[i];
6876           const PetscInt gr  = rmap[i];
6877           const PetscInt nz  = ii[i+1] - ii[i];
6878           if (gr < rs || gr >= re) { /* this is an offproc row */
6879             for (j = ii[i]; j < ii[i+1]; j++) {
6880               *coi++ = gr;
6881               *idxoff++ = j;
6882             }
6883             if (!cmapt[cp]) { /* already global */
6884               for (j = 0; j < nz; j++) *coj++ = jj[j];
6885             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6886               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6887             } else { /* offdiag */
6888               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6889             }
6890             ncoo_o += nz;
6891           } else { /* this is a local row */
6892             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6893           }
6894         }
6895       }
6896       mmdata->off[cp + 1] = idxoff;
6897       mmdata->own[cp + 1] = idxown;
6898     }
6899 
6900     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6901     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6902     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6903     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
6904     ncoo = ncoo_d + ncoo_oown + ncoo2;
6905     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6906     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
6907     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6908     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6909     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6910     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6911     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
6912     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6913     coo_i = coo_i2;
6914     coo_j = coo_j2;
6915   } else { /* no offproc values insertion */
6916     ncoo = ncoo_d;
6917     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6918 
6919     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6920     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6921     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6922   }
6923   mmdata->hasoffproc = hasoffproc;
6924 
6925    /* gather (i,j) of nonzeros inserted locally */
6926   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6927     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6928     PetscInt       *coi = coo_i + ncoo_d;
6929     PetscInt       *coj = coo_j + ncoo_d;
6930     const PetscInt *jj  = mm->j;
6931     const PetscInt *ii  = mm->i;
6932     const PetscInt *cmap = cmapa[cp];
6933     const PetscInt *rmap = rmapa[cp];
6934     const PetscInt mr = mp[cp]->rmap->n;
6935     const PetscInt rs = C->rmap->rstart;
6936     const PetscInt re = C->rmap->rend;
6937     const PetscInt cs = C->cmap->rstart;
6938 
6939     if (mptmp[cp]) continue;
6940     if (rmapt[cp] == 1) { /* consecutive rows */
6941       /* fill coo_i */
6942       for (i = 0; i < mr; i++) {
6943         const PetscInt gr = i + rs;
6944         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6945       }
6946       /* fill coo_j */
6947       if (!cmapt[cp]) { /* type-0, already global */
6948         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6949       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
6950         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
6951       } else { /* type-2, local to global for sparse columns */
6952         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6953       }
6954       ncoo_d += mm->nz;
6955     } else if (rmapt[cp] == 2) { /* sparse rows */
6956       for (i = 0; i < mr; i++) {
6957         const PetscInt *jj = mm->j + ii[i];
6958         const PetscInt gr  = rmap[i];
6959         const PetscInt nz  = ii[i+1] - ii[i];
6960         if (gr >= rs && gr < re) { /* local rows */
6961           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6962           if (!cmapt[cp]) { /* type-0, already global */
6963             for (j = 0; j < nz; j++) *coj++ = jj[j];
6964           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6965             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6966           } else { /* type-2, local to global for sparse columns */
6967             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6968           }
6969           ncoo_d += nz;
6970         }
6971       }
6972     }
6973   }
6974   if (glob) {
6975     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6976   }
6977   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6978   if (P_oth_l2g) {
6979     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6980   }
6981   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6982   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
6983   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6984 
6985   /* preallocate with COO data */
6986   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6987   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6988   PetscFunctionReturn(0);
6989 }
6990 
6991 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6992 {
6993   Mat_Product    *product = mat->product;
6994   PetscErrorCode ierr;
6995 #if defined(PETSC_HAVE_DEVICE)
6996   PetscBool      match = PETSC_FALSE;
6997   PetscBool      usecpu = PETSC_FALSE;
6998 #else
6999   PetscBool      match = PETSC_TRUE;
7000 #endif
7001 
7002   PetscFunctionBegin;
7003   MatCheckProduct(mat,1);
7004 #if defined(PETSC_HAVE_DEVICE)
7005   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7006     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7007   }
7008   if (match) { /* we can always fallback to the CPU if requested */
7009     switch (product->type) {
7010     case MATPRODUCT_AB:
7011       if (product->api_user) {
7012         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7013         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7014         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7015       } else {
7016         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7017         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7018         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7019       }
7020       break;
7021     case MATPRODUCT_AtB:
7022       if (product->api_user) {
7023         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7024         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7025         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7026       } else {
7027         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7028         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7029         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7030       }
7031       break;
7032     case MATPRODUCT_PtAP:
7033       if (product->api_user) {
7034         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7035         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7036         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7037       } else {
7038         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7039         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7040         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7041       }
7042       break;
7043     default:
7044       break;
7045     }
7046     match = (PetscBool)!usecpu;
7047   }
7048 #endif
7049   if (match) {
7050     switch (product->type) {
7051     case MATPRODUCT_AB:
7052     case MATPRODUCT_AtB:
7053     case MATPRODUCT_PtAP:
7054       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7055       break;
7056     default:
7057       break;
7058     }
7059   }
7060   /* fallback to MPIAIJ ops */
7061   if (!mat->ops->productsymbolic) {
7062     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7063   }
7064   PetscFunctionReturn(0);
7065 }
7066