xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7d5fd1e4d9337468ad3f05b65b7facdcd2dfd2a4)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
66 {
67   PetscErrorCode ierr;
68   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
69 
70   PetscFunctionBegin;
71   if (mat->A) {
72     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
73     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
74   }
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
79 {
80   PetscErrorCode  ierr;
81   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
82   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
83   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
84   const PetscInt  *ia,*ib;
85   const MatScalar *aa,*bb,*aav,*bav;
86   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
87   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
88 
89   PetscFunctionBegin;
90   *keptrows = NULL;
91 
92   ia   = a->i;
93   ib   = b->i;
94   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
95   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) {
100       cnt++;
101       goto ok1;
102     }
103     aa = aav + ia[i];
104     for (j=0; j<na; j++) {
105       if (aa[j] != 0.0) goto ok1;
106     }
107     bb = bav + ib[i];
108     for (j=0; j <nb; j++) {
109       if (bb[j] != 0.0) goto ok1;
110     }
111     cnt++;
112 ok1:;
113   }
114   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
115   if (!n0rows) {
116     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
117     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
118     PetscFunctionReturn(0);
119   }
120   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
121   cnt  = 0;
122   for (i=0; i<m; i++) {
123     na = ia[i+1] - ia[i];
124     nb = ib[i+1] - ib[i];
125     if (!na && !nb) continue;
126     aa = aav + ia[i];
127     for (j=0; j<na;j++) {
128       if (aa[j] != 0.0) {
129         rows[cnt++] = rstart + i;
130         goto ok2;
131       }
132     }
133     bb = bav + ib[i];
134     for (j=0; j<nb; j++) {
135       if (bb[j] != 0.0) {
136         rows[cnt++] = rstart + i;
137         goto ok2;
138       }
139     }
140 ok2:;
141   }
142   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
143   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
149 {
150   PetscErrorCode    ierr;
151   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
152   PetscBool         cong;
153 
154   PetscFunctionBegin;
155   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
156   if (Y->assembled && cong) {
157     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
158   } else {
159     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
165 {
166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
167   PetscErrorCode ierr;
168   PetscInt       i,rstart,nrows,*rows;
169 
170   PetscFunctionBegin;
171   *zrows = NULL;
172   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
173   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
174   for (i=0; i<nrows; i++) rows[i] += rstart;
175   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
180 {
181   PetscErrorCode    ierr;
182   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
183   PetscInt          i,m,n,*garray = aij->garray;
184   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
185   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
186   PetscReal         *work;
187   const PetscScalar *dummy;
188 
189   PetscFunctionBegin;
190   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
191   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
192   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
193   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   if (type == NORM_2) {
197     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
198       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
199     }
200     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
201       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
202     }
203   } else if (type == NORM_1) {
204     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
205       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
206     }
207     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
208       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
209     }
210   } else if (type == NORM_INFINITY) {
211     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
212       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
213     }
214     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
215       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
216     }
217   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
219       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
220     }
221     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
222       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
223     }
224   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
225     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
226       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
227     }
228     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
229       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
230     }
231   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
234   } else {
235     ierr = MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
236   }
237   ierr = PetscFree(work);CHKERRQ(ierr);
238   if (type == NORM_2) {
239     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i=0; i<n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
247 {
248   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
249   IS              sis,gis;
250   PetscErrorCode  ierr;
251   const PetscInt  *isis,*igis;
252   PetscInt        n,*iis,nsis,ngis,rstart,i;
253 
254   PetscFunctionBegin;
255   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
256   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
257   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
258   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
259   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
260   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
261 
262   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
263   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
264   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
265   n    = ngis + nsis;
266   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
267   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
268   for (i=0; i<n; i++) iis[i] += rstart;
269   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
270 
271   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
272   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
273   ierr = ISDestroy(&sis);CHKERRQ(ierr);
274   ierr = ISDestroy(&gis);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 /*
279   Local utility routine that creates a mapping from the global column
280 number to the local number in the off-diagonal part of the local
281 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
282 a slightly higher hash table cost; without it it is not scalable (each processor
283 has an order N integer array but is fast to access.
284 */
285 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       n = aij->B->cmap->n,i;
290 
291   PetscFunctionBegin;
292   if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
293 #if defined(PETSC_USE_CTABLE)
294   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
295   for (i=0; i<n; i++) {
296     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
297   }
298 #else
299   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
300   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
301   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
302 #endif
303   PetscFunctionReturn(0);
304 }
305 
306 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
307 { \
308     if (col <= lastcol1)  low1 = 0;     \
309     else                 high1 = nrow1; \
310     lastcol1 = col;\
311     while (high1-low1 > 5) { \
312       t = (low1+high1)/2; \
313       if (rp1[t] > col) high1 = t; \
314       else              low1  = t; \
315     } \
316       for (_i=low1; _i<high1; _i++) { \
317         if (rp1[_i] > col) break; \
318         if (rp1[_i] == col) { \
319           if (addv == ADD_VALUES) { \
320             ap1[_i] += value;   \
321             /* Not sure LogFlops will slow dow the code or not */ \
322             (void)PetscLogFlops(1.0);   \
323            } \
324           else                    ap1[_i] = value; \
325           inserted = PETSC_TRUE; \
326           goto a_noinsert; \
327         } \
328       }  \
329       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
330       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
331       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
332       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
333       N = nrow1++ - 1; a->nz++; high1++; \
334       /* shift up all the later entries in this row */ \
335       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
336       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
337       rp1[_i] = col;  \
338       ap1[_i] = value;  \
339       A->nonzerostate++;\
340       a_noinsert: ; \
341       ailen[row] = nrow1; \
342 }
343 
344 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
345   { \
346     if (col <= lastcol2) low2 = 0;                        \
347     else high2 = nrow2;                                   \
348     lastcol2 = col;                                       \
349     while (high2-low2 > 5) {                              \
350       t = (low2+high2)/2;                                 \
351       if (rp2[t] > col) high2 = t;                        \
352       else             low2  = t;                         \
353     }                                                     \
354     for (_i=low2; _i<high2; _i++) {                       \
355       if (rp2[_i] > col) break;                           \
356       if (rp2[_i] == col) {                               \
357         if (addv == ADD_VALUES) {                         \
358           ap2[_i] += value;                               \
359           (void)PetscLogFlops(1.0);                       \
360         }                                                 \
361         else                    ap2[_i] = value;          \
362         inserted = PETSC_TRUE;                            \
363         goto b_noinsert;                                  \
364       }                                                   \
365     }                                                     \
366     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
367     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
368     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
369     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
370     N = nrow2++ - 1; b->nz++; high2++;                    \
371     /* shift up all the later entries in this row */      \
372     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
373     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
374     rp2[_i] = col;                                        \
375     ap2[_i] = value;                                      \
376     B->nonzerostate++;                                    \
377     b_noinsert: ;                                         \
378     bilen[row] = nrow2;                                   \
379   }
380 
381 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
382 {
383   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
384   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
385   PetscErrorCode ierr;
386   PetscInt       l,*garray = mat->garray,diag;
387 
388   PetscFunctionBegin;
389   /* code only works for square matrices A */
390 
391   /* find size of row to the left of the diagonal part */
392   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
393   row  = row - diag;
394   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
395     if (garray[b->j[b->i[row]+l]] > diag) break;
396   }
397   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
398 
399   /* diagonal part */
400   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
401 
402   /* right of diagonal part */
403   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
404 #if defined(PETSC_HAVE_DEVICE)
405   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
406 #endif
407   PetscFunctionReturn(0);
408 }
409 
410 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscScalar    value = 0.0;
414   PetscErrorCode ierr;
415   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
416   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
417   PetscBool      roworiented = aij->roworiented;
418 
419   /* Some Variables required in the macro */
420   Mat        A                    = aij->A;
421   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
422   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
423   PetscBool  ignorezeroentries    = a->ignorezeroentries;
424   Mat        B                    = aij->B;
425   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
426   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
427   MatScalar  *aa,*ba;
428   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
429    * cannot use "#if defined" inside a macro. */
430   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
431 
432   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
433   PetscInt  nonew;
434   MatScalar *ap1,*ap2;
435 
436   PetscFunctionBegin;
437 #if defined(PETSC_HAVE_DEVICE)
438   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
439     const PetscScalar *dummy;
440     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
441     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
442   }
443   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
444     const PetscScalar *dummy;
445     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
446     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
447   }
448 #endif
449   aa = a->a;
450   ba = b->a;
451   for (i=0; i<m; i++) {
452     if (im[i] < 0) continue;
453     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
454     if (im[i] >= rstart && im[i] < rend) {
455       row      = im[i] - rstart;
456       lastcol1 = -1;
457       rp1      = aj + ai[row];
458       ap1      = aa + ai[row];
459       rmax1    = aimax[row];
460       nrow1    = ailen[row];
461       low1     = 0;
462       high1    = nrow1;
463       lastcol2 = -1;
464       rp2      = bj + bi[row];
465       ap2      = ba + bi[row];
466       rmax2    = bimax[row];
467       nrow2    = bilen[row];
468       low2     = 0;
469       high2    = nrow2;
470 
471       for (j=0; j<n; j++) {
472         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
473         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
474         if (in[j] >= cstart && in[j] < cend) {
475           col   = in[j] - cstart;
476           nonew = a->nonew;
477           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
478 #if defined(PETSC_HAVE_DEVICE)
479           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
480 #endif
481         } else if (in[j] < 0) continue;
482         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
483         else {
484           if (mat->was_assembled) {
485             if (!aij->colmap) {
486               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 #if defined(PETSC_USE_CTABLE)
489             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
490             col--;
491 #else
492             col = aij->colmap[in[j]] - 1;
493 #endif
494             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
495               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
496               col  =  in[j];
497               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
498               B        = aij->B;
499               b        = (Mat_SeqAIJ*)B->data;
500               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
501               rp2      = bj + bi[row];
502               ap2      = ba + bi[row];
503               rmax2    = bimax[row];
504               nrow2    = bilen[row];
505               low2     = 0;
506               high2    = nrow2;
507               bm       = aij->B->rmap->n;
508               ba       = b->a;
509               inserted = PETSC_FALSE;
510             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
511               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
512                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
513               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
514             }
515           } else col = in[j];
516           nonew = b->nonew;
517           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
518 #if defined(PETSC_HAVE_DEVICE)
519           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
520 #endif
521         }
522       }
523     } else {
524       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
525       if (!aij->donotstash) {
526         mat->assembled = PETSC_FALSE;
527         if (roworiented) {
528           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
529         } else {
530           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
531         }
532       }
533     }
534   }
535   PetscFunctionReturn(0);
536 }
537 
538 /*
539     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
540     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
541     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
542 */
543 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
544 {
545   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
546   Mat            A           = aij->A; /* diagonal part of the matrix */
547   Mat            B           = aij->B; /* offdiagonal part of the matrix */
548   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
549   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
550   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
551   PetscInt       *ailen      = a->ilen,*aj = a->j;
552   PetscInt       *bilen      = b->ilen,*bj = b->j;
553   PetscInt       am          = aij->A->rmap->n,j;
554   PetscInt       diag_so_far = 0,dnz;
555   PetscInt       offd_so_far = 0,onz;
556 
557   PetscFunctionBegin;
558   /* Iterate over all rows of the matrix */
559   for (j=0; j<am; j++) {
560     dnz = onz = 0;
561     /*  Iterate over all non-zero columns of the current row */
562     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
563       /* If column is in the diagonal */
564       if (mat_j[col] >= cstart && mat_j[col] < cend) {
565         aj[diag_so_far++] = mat_j[col] - cstart;
566         dnz++;
567       } else { /* off-diagonal entries */
568         bj[offd_so_far++] = mat_j[col];
569         onz++;
570       }
571     }
572     ailen[j] = dnz;
573     bilen[j] = onz;
574   }
575   PetscFunctionReturn(0);
576 }
577 
578 /*
579     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
580     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
581     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
582     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
583     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
584 */
585 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
586 {
587   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
588   Mat            A      = aij->A; /* diagonal part of the matrix */
589   Mat            B      = aij->B; /* offdiagonal part of the matrix */
590   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
591   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
592   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
593   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
594   PetscInt       *ailen = a->ilen,*aj = a->j;
595   PetscInt       *bilen = b->ilen,*bj = b->j;
596   PetscInt       am     = aij->A->rmap->n,j;
597   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
598   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
599   PetscScalar    *aa = a->a,*ba = b->a;
600 
601   PetscFunctionBegin;
602   /* Iterate over all rows of the matrix */
603   for (j=0; j<am; j++) {
604     dnz_row = onz_row = 0;
605     rowstart_offd = full_offd_i[j];
606     rowstart_diag = full_diag_i[j];
607     /*  Iterate over all non-zero columns of the current row */
608     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
609       /* If column is in the diagonal */
610       if (mat_j[col] >= cstart && mat_j[col] < cend) {
611         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
612         aa[rowstart_diag+dnz_row] = mat_a[col];
613         dnz_row++;
614       } else { /* off-diagonal entries */
615         bj[rowstart_offd+onz_row] = mat_j[col];
616         ba[rowstart_offd+onz_row] = mat_a[col];
617         onz_row++;
618       }
619     }
620     ailen[j] = dnz_row;
621     bilen[j] = onz_row;
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
627 {
628   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
631   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
632 
633   PetscFunctionBegin;
634   for (i=0; i<m; i++) {
635     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
636     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j=0; j<n; j++) {
640         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
641         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col  = idxn[j] - cstart;
644           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645         } else {
646           if (!aij->colmap) {
647             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
648           }
649 #if defined(PETSC_USE_CTABLE)
650           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
651           col--;
652 #else
653           col = aij->colmap[idxn[j]] - 1;
654 #endif
655           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
656           else {
657             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
658           }
659         }
660       }
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
676   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
677   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
682 {
683   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
684   PetscErrorCode ierr;
685   PetscMPIInt    n;
686   PetscInt       i,j,rstart,ncols,flg;
687   PetscInt       *row,*col;
688   PetscBool      other_disassembled;
689   PetscScalar    *val;
690 
691   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
692 
693   PetscFunctionBegin;
694   if (!aij->donotstash && !mat->nooffprocentries) {
695     while (1) {
696       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
697       if (!flg) break;
698 
699       for (i=0; i<n;) {
700         /* Now identify the consecutive vals belonging to the same row */
701         for (j=i,rstart=row[j]; j<n; j++) {
702           if (row[j] != rstart) break;
703         }
704         if (j < n) ncols = j-i;
705         else       ncols = n-i;
706         /* Now assemble all these values with a single function call */
707         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
708         i    = j;
709       }
710     }
711     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
712   }
713 #if defined(PETSC_HAVE_DEVICE)
714   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
715   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
716   if (mat->boundtocpu) {
717     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
718     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
719   }
720 #endif
721   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
722   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
723 
724   /* determine if any processor has disassembled, if so we must
725      also disassemble ourself, in order that we may reassemble. */
726   /*
727      if nonzero structure of submatrix B cannot change then we know that
728      no processor disassembled thus we can skip this stuff
729   */
730   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
731     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
732     if (mat->was_assembled && !other_disassembled) {
733 #if defined(PETSC_HAVE_DEVICE)
734       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
735 #endif
736       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
737     }
738   }
739   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
740     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
741   }
742   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
743 #if defined(PETSC_HAVE_DEVICE)
744   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
745 #endif
746   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
747   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
748 
749   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
750 
751   aij->rowvalues = NULL;
752 
753   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
754 
755   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
756   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
757     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
758     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
759   }
760 #if defined(PETSC_HAVE_DEVICE)
761   mat->offloadmask = PETSC_OFFLOAD_BOTH;
762 #endif
763   PetscFunctionReturn(0);
764 }
765 
766 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
767 {
768   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
773   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
778 {
779   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
780   PetscObjectState sA, sB;
781   PetscInt        *lrows;
782   PetscInt         r, len;
783   PetscBool        cong, lch, gch;
784   PetscErrorCode   ierr;
785 
786   PetscFunctionBegin;
787   /* get locally owned rows */
788   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
789   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
790   /* fix right hand side if needed */
791   if (x && b) {
792     const PetscScalar *xx;
793     PetscScalar       *bb;
794 
795     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
796     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
797     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
798     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
799     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
800     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
801   }
802 
803   sA = mat->A->nonzerostate;
804   sB = mat->B->nonzerostate;
805 
806   if (diag != 0.0 && cong) {
807     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
808     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
810     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
811     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
812     PetscInt   nnwA, nnwB;
813     PetscBool  nnzA, nnzB;
814 
815     nnwA = aijA->nonew;
816     nnwB = aijB->nonew;
817     nnzA = aijA->keepnonzeropattern;
818     nnzB = aijB->keepnonzeropattern;
819     if (!nnzA) {
820       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
821       aijA->nonew = 0;
822     }
823     if (!nnzB) {
824       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
825       aijB->nonew = 0;
826     }
827     /* Must zero here before the next loop */
828     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
829     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
830     for (r = 0; r < len; ++r) {
831       const PetscInt row = lrows[r] + A->rmap->rstart;
832       if (row >= A->cmap->N) continue;
833       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
834     }
835     aijA->nonew = nnwA;
836     aijB->nonew = nnwB;
837   } else {
838     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
839     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
840   }
841   ierr = PetscFree(lrows);CHKERRQ(ierr);
842   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
843   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
844 
845   /* reduce nonzerostate */
846   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
847   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
848   if (gch) A->nonzerostate++;
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
853 {
854   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
855   PetscErrorCode    ierr;
856   PetscMPIInt       n = A->rmap->n;
857   PetscInt          i,j,r,m,len = 0;
858   PetscInt          *lrows,*owners = A->rmap->range;
859   PetscMPIInt       p = 0;
860   PetscSFNode       *rrows;
861   PetscSF           sf;
862   const PetscScalar *xx;
863   PetscScalar       *bb,*mask;
864   Vec               xmask,lmask;
865   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
866   const PetscInt    *aj, *ii,*ridx;
867   PetscScalar       *aa;
868 
869   PetscFunctionBegin;
870   /* Create SF where leaves are input rows and roots are owned rows */
871   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
872   for (r = 0; r < n; ++r) lrows[r] = -1;
873   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
874   for (r = 0; r < N; ++r) {
875     const PetscInt idx   = rows[r];
876     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
877     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
878       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
879     }
880     rrows[r].rank  = p;
881     rrows[r].index = rows[r] - owners[p];
882   }
883   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
884   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
885   /* Collect flags for rows to be zeroed */
886   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
887   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
888   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
889   /* Compress and put in row numbers */
890   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
891   /* zero diagonal part of matrix */
892   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
893   /* handle off diagonal part of matrix */
894   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
895   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
896   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
897   for (i=0; i<len; i++) bb[lrows[i]] = 1;
898   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
899   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
902   if (x && b) { /* this code is buggy when the row and column layout don't match */
903     PetscBool cong;
904 
905     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
906     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
907     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
910     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
911   }
912   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
913   /* remove zeroed rows of off diagonal matrix */
914   ii = aij->i;
915   for (i=0; i<len; i++) {
916     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
917   }
918   /* loop over all elements of off process part of matrix zeroing removed columns*/
919   if (aij->compressedrow.use) {
920     m    = aij->compressedrow.nrows;
921     ii   = aij->compressedrow.i;
922     ridx = aij->compressedrow.rindex;
923     for (i=0; i<m; i++) {
924       n  = ii[i+1] - ii[i];
925       aj = aij->j + ii[i];
926       aa = aij->a + ii[i];
927 
928       for (j=0; j<n; j++) {
929         if (PetscAbsScalar(mask[*aj])) {
930           if (b) bb[*ridx] -= *aa*xx[*aj];
931           *aa = 0.0;
932         }
933         aa++;
934         aj++;
935       }
936       ridx++;
937     }
938   } else { /* do not use compressed row format */
939     m = l->B->rmap->n;
940     for (i=0; i<m; i++) {
941       n  = ii[i+1] - ii[i];
942       aj = aij->j + ii[i];
943       aa = aij->a + ii[i];
944       for (j=0; j<n; j++) {
945         if (PetscAbsScalar(mask[*aj])) {
946           if (b) bb[i] -= *aa*xx[*aj];
947           *aa = 0.0;
948         }
949         aa++;
950         aj++;
951       }
952     }
953   }
954   if (x && b) {
955     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
956     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
957   }
958   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
959   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
960   ierr = PetscFree(lrows);CHKERRQ(ierr);
961 
962   /* only change matrix nonzero state if pattern was allowed to be changed */
963   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
964     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
965     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
966   }
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
971 {
972   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode ierr;
974   PetscInt       nt;
975   VecScatter     Mvctx = a->Mvctx;
976 
977   PetscFunctionBegin;
978   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
979   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
980   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
982   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
988 {
989   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
998 {
999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1000   PetscErrorCode ierr;
1001   VecScatter     Mvctx = a->Mvctx;
1002 
1003   PetscFunctionBegin;
1004   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1006   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   /* do nondiagonal part */
1018   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1019   /* do local part */
1020   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1021   /* add partial results together */
1022   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1028 {
1029   MPI_Comm       comm;
1030   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1031   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1032   IS             Me,Notme;
1033   PetscErrorCode ierr;
1034   PetscInt       M,N,first,last,*notme,i;
1035   PetscBool      lf;
1036   PetscMPIInt    size;
1037 
1038   PetscFunctionBegin;
1039   /* Easy test: symmetric diagonal block */
1040   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1041   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1042   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1070 {
1071   PetscErrorCode ierr;
1072 
1073   PetscFunctionBegin;
1074   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1079 {
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   /* do nondiagonal part */
1085   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1086   /* do local part */
1087   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1088   /* add partial results together */
1089   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /*
1095   This only works correctly for square matrices where the subblock A->A is the
1096    diagonal block
1097 */
1098 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1099 {
1100   PetscErrorCode ierr;
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102 
1103   PetscFunctionBegin;
1104   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1105   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1106   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1111 {
1112   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1117   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1122 {
1123   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127 #if defined(PETSC_USE_LOG)
1128   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1129 #endif
1130   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1132   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1133   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1134 #if defined(PETSC_USE_CTABLE)
1135   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1136 #else
1137   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1138 #endif
1139   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1140   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1141   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1142   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1143   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1144   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1145 
1146   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1147   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1148 
1149   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1151   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1153   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1154   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1156   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1157   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1159 #if defined(PETSC_HAVE_CUDA)
1160   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1161 #endif
1162 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1164 #endif
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1166 #if defined(PETSC_HAVE_ELEMENTAL)
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1168 #endif
1169 #if defined(PETSC_HAVE_SCALAPACK)
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1171 #endif
1172 #if defined(PETSC_HAVE_HYPRE)
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1174   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1175 #endif
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1182 #if defined(PETSC_HAVE_MKL_SPARSE)
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1184 #endif
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1192 {
1193   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1194   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1195   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1196   const PetscInt    *garray = aij->garray;
1197   const PetscScalar *aa,*ba;
1198   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1199   PetscInt          *rowlens;
1200   PetscInt          *colidxs;
1201   PetscScalar       *matvals;
1202   PetscErrorCode    ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1206 
1207   M  = mat->rmap->N;
1208   N  = mat->cmap->N;
1209   m  = mat->rmap->n;
1210   rs = mat->rmap->rstart;
1211   cs = mat->cmap->rstart;
1212   nz = A->nz + B->nz;
1213 
1214   /* write matrix header */
1215   header[0] = MAT_FILE_CLASSID;
1216   header[1] = M; header[2] = N; header[3] = nz;
1217   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1218   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1219 
1220   /* fill in and store row lengths  */
1221   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1222   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1223   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1224   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1225 
1226   /* fill in and store column indices */
1227   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1228   for (cnt=0, i=0; i<m; i++) {
1229     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1230       if (garray[B->j[jb]] > cs) break;
1231       colidxs[cnt++] = garray[B->j[jb]];
1232     }
1233     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1234       colidxs[cnt++] = A->j[ja] + cs;
1235     for (; jb<B->i[i+1]; jb++)
1236       colidxs[cnt++] = garray[B->j[jb]];
1237   }
1238   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1239   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1240   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1241 
1242   /* fill in and store nonzero values */
1243   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1244   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1245   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1246   for (cnt=0, i=0; i<m; i++) {
1247     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1248       if (garray[B->j[jb]] > cs) break;
1249       matvals[cnt++] = ba[jb];
1250     }
1251     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1252       matvals[cnt++] = aa[ja];
1253     for (; jb<B->i[i+1]; jb++)
1254       matvals[cnt++] = ba[jb];
1255   }
1256   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1257   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1258   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1259   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1260   ierr = PetscFree(matvals);CHKERRQ(ierr);
1261 
1262   /* write block size option to the viewer's .info file */
1263   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #include <petscdraw.h>
1268 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1269 {
1270   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1271   PetscErrorCode    ierr;
1272   PetscMPIInt       rank = aij->rank,size = aij->size;
1273   PetscBool         isdraw,iascii,isbinary;
1274   PetscViewer       sviewer;
1275   PetscViewerFormat format;
1276 
1277   PetscFunctionBegin;
1278   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1279   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1280   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1281   if (iascii) {
1282     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1283     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1284       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1285       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1286       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1287       for (i=0; i<(PetscInt)size; i++) {
1288         nmax = PetscMax(nmax,nz[i]);
1289         nmin = PetscMin(nmin,nz[i]);
1290         navg += nz[i];
1291       }
1292       ierr = PetscFree(nz);CHKERRQ(ierr);
1293       navg = navg/size;
1294       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1295       PetscFunctionReturn(0);
1296     }
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1299       MatInfo   info;
1300       PetscInt *inodes=NULL;
1301 
1302       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1303       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1306       if (!inodes) {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1309       } else {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1312       }
1313       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1315       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1317       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1320       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1323       PetscInt inodecount,inodelimit,*inodes;
1324       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1325       if (inodes) {
1326         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1329       }
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1332       PetscFunctionReturn(0);
1333     }
1334   } else if (isbinary) {
1335     if (size == 1) {
1336       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1340     }
1341     PetscFunctionReturn(0);
1342   } else if (iascii && size == 1) {
1343     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1344     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1345     PetscFunctionReturn(0);
1346   } else if (isdraw) {
1347     PetscDraw draw;
1348     PetscBool isnull;
1349     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1350     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1351     if (isnull) PetscFunctionReturn(0);
1352   }
1353 
1354   { /* assemble the entire matrix onto first processor */
1355     Mat A = NULL, Av;
1356     IS  isrow,iscol;
1357 
1358     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1359     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1360     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1361     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1362 /*  The commented code uses MatCreateSubMatrices instead */
1363 /*
1364     Mat *AA, A = NULL, Av;
1365     IS  isrow,iscol;
1366 
1367     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1368     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1369     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1370     if (rank == 0) {
1371        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1372        A    = AA[0];
1373        Av   = AA[0];
1374     }
1375     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1376 */
1377     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1378     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1379     /*
1380        Everyone has to call to draw the matrix since the graphics waits are
1381        synchronized across all processors that share the PetscDraw object
1382     */
1383     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1384     if (rank == 0) {
1385       if (((PetscObject)mat)->name) {
1386         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1387       }
1388       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1389     }
1390     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1391     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1392     ierr = MatDestroy(&A);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1398 {
1399   PetscErrorCode ierr;
1400   PetscBool      iascii,isdraw,issocket,isbinary;
1401 
1402   PetscFunctionBegin;
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1404   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1405   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1407   if (iascii || isdraw || isbinary || issocket) {
1408     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1414 {
1415   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1416   PetscErrorCode ierr;
1417   Vec            bb1 = NULL;
1418   PetscBool      hasop;
1419 
1420   PetscFunctionBegin;
1421   if (flag == SOR_APPLY_UPPER) {
1422     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1423     PetscFunctionReturn(0);
1424   }
1425 
1426   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1427     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1428   }
1429 
1430   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1433       its--;
1434     }
1435 
1436     while (its--) {
1437       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1438       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1442       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1443 
1444       /* local sweep */
1445       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1446     }
1447   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1450       its--;
1451     }
1452     while (its--) {
1453       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1454       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1458       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1459 
1460       /* local sweep */
1461       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1462     }
1463   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1464     if (flag & SOR_ZERO_INITIAL_GUESS) {
1465       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1466       its--;
1467     }
1468     while (its--) {
1469       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471 
1472       /* update rhs: bb1 = bb - B*x */
1473       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1474       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1475 
1476       /* local sweep */
1477       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1478     }
1479   } else if (flag & SOR_EISENSTAT) {
1480     Vec xx1;
1481 
1482     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1483     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1484 
1485     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487     if (!mat->diag) {
1488       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1489       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1490     }
1491     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1492     if (hasop) {
1493       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1494     } else {
1495       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1496     }
1497     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1498 
1499     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1500 
1501     /* local sweep */
1502     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1503     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1504     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1505   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1506 
1507   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1508 
1509   matin->factorerrortype = mat->A->factorerrortype;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1514 {
1515   Mat            aA,aB,Aperm;
1516   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1517   PetscScalar    *aa,*ba;
1518   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1519   PetscSF        rowsf,sf;
1520   IS             parcolp = NULL;
1521   PetscBool      done;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1526   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1527   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1528   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1529 
1530   /* Invert row permutation to find out where my rows should go */
1531   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1532   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1533   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1534   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1535   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1536   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1537 
1538   /* Invert column permutation to find out where my columns should go */
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1542   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1543   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1545   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1546 
1547   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1548   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1549   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1550 
1551   /* Find out where my gcols should go */
1552   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1553   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1554   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1555   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1556   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1557   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1558   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1559   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1560 
1561   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1562   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1563   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1564   for (i=0; i<m; i++) {
1565     PetscInt    row = rdest[i];
1566     PetscMPIInt rowner;
1567     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1568     for (j=ai[i]; j<ai[i+1]; j++) {
1569       PetscInt    col = cdest[aj[j]];
1570       PetscMPIInt cowner;
1571       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1572       if (rowner == cowner) dnnz[i]++;
1573       else onnz[i]++;
1574     }
1575     for (j=bi[i]; j<bi[i+1]; j++) {
1576       PetscInt    col = gcdest[bj[j]];
1577       PetscMPIInt cowner;
1578       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1579       if (rowner == cowner) dnnz[i]++;
1580       else onnz[i]++;
1581     }
1582   }
1583   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1588 
1589   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1590   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1591   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1594     PetscInt j0,rowlen;
1595     rowlen = ai[i+1] - ai[i];
1596     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1597       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1598       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1599     }
1600     rowlen = bi[i+1] - bi[i];
1601     for (j0=j=0; j<rowlen; j0=j) {
1602       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1603       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1604     }
1605   }
1606   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1609   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1610   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1611   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1612   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1614   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1615   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1616   *B = Aperm;
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1621 {
1622   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1627   if (ghosts) *ghosts = aij->garray;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1632 {
1633   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1634   Mat            A    = mat->A,B = mat->B;
1635   PetscErrorCode ierr;
1636   PetscLogDouble isend[5],irecv[5];
1637 
1638   PetscFunctionBegin;
1639   info->block_size = 1.0;
1640   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1641 
1642   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1643   isend[3] = info->memory;  isend[4] = info->mallocs;
1644 
1645   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1646 
1647   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;  isend[4] += info->mallocs;
1649   if (flag == MAT_LOCAL) {
1650     info->nz_used      = isend[0];
1651     info->nz_allocated = isend[1];
1652     info->nz_unneeded  = isend[2];
1653     info->memory       = isend[3];
1654     info->mallocs      = isend[4];
1655   } else if (flag == MAT_GLOBAL_MAX) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   } else if (flag == MAT_GLOBAL_SUM) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   }
1672   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1673   info->fill_ratio_needed = 0;
1674   info->factor_mallocs    = 0;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1679 {
1680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1681   PetscErrorCode ierr;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692   case MAT_FORM_EXPLICIT_TRANSPOSE:
1693     MatCheckPreallocated(A,1);
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1696     break;
1697   case MAT_ROW_ORIENTED:
1698     MatCheckPreallocated(A,1);
1699     a->roworiented = flg;
1700 
1701     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1702     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1703     break;
1704   case MAT_FORCE_DIAGONAL_ENTRIES:
1705   case MAT_SORTED_FULL:
1706     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1712   case MAT_SPD:
1713   case MAT_SYMMETRIC:
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715   case MAT_HERMITIAN:
1716   case MAT_SYMMETRY_ETERNAL:
1717     break;
1718   case MAT_SUBMAT_SINGLEIS:
1719     A->submat_singleis = flg;
1720     break;
1721   case MAT_STRUCTURE_ONLY:
1722     /* The option is handled directly by MatSetOption() */
1723     break;
1724   default:
1725     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1731 {
1732   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1733   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1734   PetscErrorCode ierr;
1735   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1736   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1737   PetscInt       *cmap,*idx_p;
1738 
1739   PetscFunctionBegin;
1740   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1741   mat->getrowactive = PETSC_TRUE;
1742 
1743   if (!mat->rowvalues && (idx || v)) {
1744     /*
1745         allocate enough space to hold information from the longest row.
1746     */
1747     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1748     PetscInt   max = 1,tmp;
1749     for (i=0; i<matin->rmap->n; i++) {
1750       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1751       if (max < tmp) max = tmp;
1752     }
1753     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1754   }
1755 
1756   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1757   lrow = row - rstart;
1758 
1759   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1760   if (!v)   {pvA = NULL; pvB = NULL;}
1761   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1762   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1763   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1764   nztot = nzA + nzB;
1765 
1766   cmap = mat->garray;
1767   if (v  || idx) {
1768     if (nztot) {
1769       /* Sort by increasing column numbers, assuming A and B already sorted */
1770       PetscInt imark = -1;
1771       if (v) {
1772         *v = v_p = mat->rowvalues;
1773         for (i=0; i<nzB; i++) {
1774           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1775           else break;
1776         }
1777         imark = i;
1778         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1779         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1780       }
1781       if (idx) {
1782         *idx = idx_p = mat->rowindices;
1783         if (imark > -1) {
1784           for (i=0; i<imark; i++) {
1785             idx_p[i] = cmap[cworkB[i]];
1786           }
1787         } else {
1788           for (i=0; i<nzB; i++) {
1789             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1790             else break;
1791           }
1792           imark = i;
1793         }
1794         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1795         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1796       }
1797     } else {
1798       if (idx) *idx = NULL;
1799       if (v)   *v   = NULL;
1800     }
1801   }
1802   *nz  = nztot;
1803   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1804   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1809 {
1810   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1811 
1812   PetscFunctionBegin;
1813   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1814   aij->getrowactive = PETSC_FALSE;
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1819 {
1820   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1821   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1822   PetscErrorCode ierr;
1823   PetscInt       i,j,cstart = mat->cmap->rstart;
1824   PetscReal      sum = 0.0;
1825   MatScalar      *v;
1826 
1827   PetscFunctionBegin;
1828   if (aij->size == 1) {
1829     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1830   } else {
1831     if (type == NORM_FROBENIUS) {
1832       v = amat->a;
1833       for (i=0; i<amat->nz; i++) {
1834         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1835       }
1836       v = bmat->a;
1837       for (i=0; i<bmat->nz; i++) {
1838         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1839       }
1840       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1841       *norm = PetscSqrtReal(*norm);
1842       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1843     } else if (type == NORM_1) { /* max column norm */
1844       PetscReal *tmp,*tmp2;
1845       PetscInt  *jj,*garray = aij->garray;
1846       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1847       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1848       *norm = 0.0;
1849       v     = amat->a; jj = amat->j;
1850       for (j=0; j<amat->nz; j++) {
1851         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1852       }
1853       v = bmat->a; jj = bmat->j;
1854       for (j=0; j<bmat->nz; j++) {
1855         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1856       }
1857       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1858       for (j=0; j<mat->cmap->N; j++) {
1859         if (tmp2[j] > *norm) *norm = tmp2[j];
1860       }
1861       ierr = PetscFree(tmp);CHKERRQ(ierr);
1862       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1863       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1864     } else if (type == NORM_INFINITY) { /* max row norm */
1865       PetscReal ntemp = 0.0;
1866       for (j=0; j<aij->A->rmap->n; j++) {
1867         v   = amat->a + amat->i[j];
1868         sum = 0.0;
1869         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1870           sum += PetscAbsScalar(*v); v++;
1871         }
1872         v = bmat->a + bmat->i[j];
1873         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1874           sum += PetscAbsScalar(*v); v++;
1875         }
1876         if (sum > ntemp) ntemp = sum;
1877       }
1878       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1879       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1880     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1886 {
1887   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1888   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1889   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1890   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1891   PetscErrorCode  ierr;
1892   Mat             B,A_diag,*B_diag;
1893   const MatScalar *pbv,*bv;
1894 
1895   PetscFunctionBegin;
1896   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1897   ai = Aloc->i; aj = Aloc->j;
1898   bi = Bloc->i; bj = Bloc->j;
1899   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1900     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1901     PetscSFNode          *oloc;
1902     PETSC_UNUSED PetscSF sf;
1903 
1904     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1905     /* compute d_nnz for preallocation */
1906     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1907     for (i=0; i<ai[ma]; i++) {
1908       d_nnz[aj[i]]++;
1909     }
1910     /* compute local off-diagonal contributions */
1911     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1912     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1913     /* map those to global */
1914     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1915     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1916     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1917     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1918     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1919     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1920     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1921 
1922     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1923     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1924     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1925     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1926     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1927     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1928   } else {
1929     B    = *matout;
1930     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1931   }
1932 
1933   b           = (Mat_MPIAIJ*)B->data;
1934   A_diag      = a->A;
1935   B_diag      = &b->A;
1936   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1937   A_diag_ncol = A_diag->cmap->N;
1938   B_diag_ilen = sub_B_diag->ilen;
1939   B_diag_i    = sub_B_diag->i;
1940 
1941   /* Set ilen for diagonal of B */
1942   for (i=0; i<A_diag_ncol; i++) {
1943     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1944   }
1945 
1946   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1947   very quickly (=without using MatSetValues), because all writes are local. */
1948   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1949 
1950   /* copy over the B part */
1951   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1952   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1953   pbv  = bv;
1954   row  = A->rmap->rstart;
1955   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i=0; i<mb; i++) {
1958     ncol = bi[i+1]-bi[i];
1959     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1960     row++;
1961     pbv += ncol; cols_tmp += ncol;
1962   }
1963   ierr = PetscFree(cols);CHKERRQ(ierr);
1964   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1965 
1966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1969     *matout = B;
1970   } else {
1971     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1977 {
1978   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1979   Mat            a    = aij->A,b = aij->B;
1980   PetscErrorCode ierr;
1981   PetscInt       s1,s2,s3;
1982 
1983   PetscFunctionBegin;
1984   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1985   if (rr) {
1986     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1987     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1990   }
1991   if (ll) {
1992     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1993     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1994     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1995   }
1996   /* scale  the diagonal block */
1997   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2002     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2010   PetscErrorCode ierr;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2018 {
2019   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2020   Mat            a,b,c,d;
2021   PetscBool      flg;
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   a = matA->A; b = matA->B;
2026   c = matB->A; d = matB->B;
2027 
2028   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2029   if (flg) {
2030     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2031   }
2032   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2037 {
2038   PetscErrorCode ierr;
2039   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2040   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2041 
2042   PetscFunctionBegin;
2043   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2044   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2045     /* because of the column compression in the off-processor part of the matrix a->B,
2046        the number of columns in a->B and b->B may be different, hence we cannot call
2047        the MatCopy() directly on the two parts. If need be, we can provide a more
2048        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2049        then copying the submatrices */
2050     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2051   } else {
2052     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2053     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2054   }
2055   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2060 {
2061   PetscErrorCode ierr;
2062 
2063   PetscFunctionBegin;
2064   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2073 {
2074   PetscInt       i,j,k,nzx,nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i=0; i<m; i++) {
2079     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2080     nzx = xi[i+1] - xi[i];
2081     nzy = yi[i+1] - yi[i];
2082     nnz[i] = 0;
2083     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2084       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k<nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2095 {
2096   PetscErrorCode ierr;
2097   PetscInt       m = Y->rmap->N;
2098   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2099   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2100 
2101   PetscFunctionBegin;
2102   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2107 {
2108   PetscErrorCode ierr;
2109   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2110 
2111   PetscFunctionBegin;
2112   if (str == SAME_NONZERO_PATTERN) {
2113     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2114     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2115   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2116     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2117   } else {
2118     Mat      B;
2119     PetscInt *nnz_d,*nnz_o;
2120 
2121     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2122     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2123     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2124     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2125     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2126     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2127     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2128     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2129     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2130     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2131     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2132     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2133     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2134   }
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2139 
2140 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2141 {
2142 #if defined(PETSC_USE_COMPLEX)
2143   PetscErrorCode ierr;
2144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2148   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2149 #else
2150   PetscFunctionBegin;
2151 #endif
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2156 {
2157   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2162   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2167 {
2168   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2173   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2178 {
2179   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2180   PetscErrorCode    ierr;
2181   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2182   PetscScalar       *va,*vv;
2183   Vec               vB,vA;
2184   const PetscScalar *vb;
2185 
2186   PetscFunctionBegin;
2187   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2188   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2189 
2190   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2191   if (idx) {
2192     for (i=0; i<m; i++) {
2193       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2194     }
2195   }
2196 
2197   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2198   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2199   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2200 
2201   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2202   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2203   for (i=0; i<m; i++) {
2204     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2205       vv[i] = vb[i];
2206       if (idx) idx[i] = a->garray[idxb[i]];
2207     } else {
2208       vv[i] = va[i];
2209       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2210         idx[i] = a->garray[idxb[i]];
2211     }
2212   }
2213   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2214   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2215   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2216   ierr = PetscFree(idxb);CHKERRQ(ierr);
2217   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2218   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2223 {
2224   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2225   PetscInt          m = A->rmap->n,n = A->cmap->n;
2226   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2227   PetscInt          *cmap  = mat->garray;
2228   PetscInt          *diagIdx, *offdiagIdx;
2229   Vec               diagV, offdiagV;
2230   PetscScalar       *a, *diagA, *offdiagA;
2231   const PetscScalar *ba,*bav;
2232   PetscInt          r,j,col,ncols,*bi,*bj;
2233   PetscErrorCode    ierr;
2234   Mat               B = mat->B;
2235   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2236 
2237   PetscFunctionBegin;
2238   /* When a process holds entire A and other processes have no entry */
2239   if (A->cmap->N == n) {
2240     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2241     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2242     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2243     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2244     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2245     PetscFunctionReturn(0);
2246   } else if (n == 0) {
2247     if (m) {
2248       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2249       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2250       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2251     }
2252     PetscFunctionReturn(0);
2253   }
2254 
2255   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2256   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2257   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2258   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2259 
2260   /* Get offdiagIdx[] for implicit 0.0 */
2261   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2262   ba   = bav;
2263   bi   = b->i;
2264   bj   = b->j;
2265   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2266   for (r = 0; r < m; r++) {
2267     ncols = bi[r+1] - bi[r];
2268     if (ncols == A->cmap->N - n) { /* Brow is dense */
2269       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2270     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2271       offdiagA[r] = 0.0;
2272 
2273       /* Find first hole in the cmap */
2274       for (j=0; j<ncols; j++) {
2275         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2276         if (col > j && j < cstart) {
2277           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2278           break;
2279         } else if (col > j + n && j >= cstart) {
2280           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2281           break;
2282         }
2283       }
2284       if (j == ncols && ncols < A->cmap->N - n) {
2285         /* a hole is outside compressed Bcols */
2286         if (ncols == 0) {
2287           if (cstart) {
2288             offdiagIdx[r] = 0;
2289           } else offdiagIdx[r] = cend;
2290         } else { /* ncols > 0 */
2291           offdiagIdx[r] = cmap[ncols-1] + 1;
2292           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2293         }
2294       }
2295     }
2296 
2297     for (j=0; j<ncols; j++) {
2298       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2299       ba++; bj++;
2300     }
2301   }
2302 
2303   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2304   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2305   for (r = 0; r < m; ++r) {
2306     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2307       a[r]   = diagA[r];
2308       if (idx) idx[r] = cstart + diagIdx[r];
2309     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) {
2312         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2313           idx[r] = cstart + diagIdx[r];
2314         } else idx[r] = offdiagIdx[r];
2315       }
2316     } else {
2317       a[r]   = offdiagA[r];
2318       if (idx) idx[r] = offdiagIdx[r];
2319     }
2320   }
2321   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2322   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2323   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2324   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2325   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2326   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2327   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2332 {
2333   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2334   PetscInt          m = A->rmap->n,n = A->cmap->n;
2335   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2336   PetscInt          *cmap  = mat->garray;
2337   PetscInt          *diagIdx, *offdiagIdx;
2338   Vec               diagV, offdiagV;
2339   PetscScalar       *a, *diagA, *offdiagA;
2340   const PetscScalar *ba,*bav;
2341   PetscInt          r,j,col,ncols,*bi,*bj;
2342   PetscErrorCode    ierr;
2343   Mat               B = mat->B;
2344   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2345 
2346   PetscFunctionBegin;
2347   /* When a process holds entire A and other processes have no entry */
2348   if (A->cmap->N == n) {
2349     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2350     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2351     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2352     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2353     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2354     PetscFunctionReturn(0);
2355   } else if (n == 0) {
2356     if (m) {
2357       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2358       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2359       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2360     }
2361     PetscFunctionReturn(0);
2362   }
2363 
2364   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2365   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2366   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2367   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2368 
2369   /* Get offdiagIdx[] for implicit 0.0 */
2370   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2371   ba   = bav;
2372   bi   = b->i;
2373   bj   = b->j;
2374   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2375   for (r = 0; r < m; r++) {
2376     ncols = bi[r+1] - bi[r];
2377     if (ncols == A->cmap->N - n) { /* Brow is dense */
2378       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2379     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2380       offdiagA[r] = 0.0;
2381 
2382       /* Find first hole in the cmap */
2383       for (j=0; j<ncols; j++) {
2384         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2385         if (col > j && j < cstart) {
2386           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2387           break;
2388         } else if (col > j + n && j >= cstart) {
2389           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2390           break;
2391         }
2392       }
2393       if (j == ncols && ncols < A->cmap->N - n) {
2394         /* a hole is outside compressed Bcols */
2395         if (ncols == 0) {
2396           if (cstart) {
2397             offdiagIdx[r] = 0;
2398           } else offdiagIdx[r] = cend;
2399         } else { /* ncols > 0 */
2400           offdiagIdx[r] = cmap[ncols-1] + 1;
2401           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2402         }
2403       }
2404     }
2405 
2406     for (j=0; j<ncols; j++) {
2407       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2408       ba++; bj++;
2409     }
2410   }
2411 
2412   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2413   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2414   for (r = 0; r < m; ++r) {
2415     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2416       a[r]   = diagA[r];
2417       if (idx) idx[r] = cstart + diagIdx[r];
2418     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2419       a[r] = diagA[r];
2420       if (idx) {
2421         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2422           idx[r] = cstart + diagIdx[r];
2423         } else idx[r] = offdiagIdx[r];
2424       }
2425     } else {
2426       a[r]   = offdiagA[r];
2427       if (idx) idx[r] = offdiagIdx[r];
2428     }
2429   }
2430   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2431   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2432   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2433   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2434   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2435   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2436   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2441 {
2442   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2443   PetscInt          m = A->rmap->n,n = A->cmap->n;
2444   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2445   PetscInt          *cmap  = mat->garray;
2446   PetscInt          *diagIdx, *offdiagIdx;
2447   Vec               diagV, offdiagV;
2448   PetscScalar       *a, *diagA, *offdiagA;
2449   const PetscScalar *ba,*bav;
2450   PetscInt          r,j,col,ncols,*bi,*bj;
2451   PetscErrorCode    ierr;
2452   Mat               B = mat->B;
2453   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2454 
2455   PetscFunctionBegin;
2456   /* When a process holds entire A and other processes have no entry */
2457   if (A->cmap->N == n) {
2458     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2459     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2460     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2461     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2462     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2463     PetscFunctionReturn(0);
2464   } else if (n == 0) {
2465     if (m) {
2466       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2467       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2468       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2469     }
2470     PetscFunctionReturn(0);
2471   }
2472 
2473   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2474   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2475   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2476   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2477 
2478   /* Get offdiagIdx[] for implicit 0.0 */
2479   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2480   ba   = bav;
2481   bi   = b->i;
2482   bj   = b->j;
2483   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2484   for (r = 0; r < m; r++) {
2485     ncols = bi[r+1] - bi[r];
2486     if (ncols == A->cmap->N - n) { /* Brow is dense */
2487       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2488     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2489       offdiagA[r] = 0.0;
2490 
2491       /* Find first hole in the cmap */
2492       for (j=0; j<ncols; j++) {
2493         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2494         if (col > j && j < cstart) {
2495           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2496           break;
2497         } else if (col > j + n && j >= cstart) {
2498           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2499           break;
2500         }
2501       }
2502       if (j == ncols && ncols < A->cmap->N - n) {
2503         /* a hole is outside compressed Bcols */
2504         if (ncols == 0) {
2505           if (cstart) {
2506             offdiagIdx[r] = 0;
2507           } else offdiagIdx[r] = cend;
2508         } else { /* ncols > 0 */
2509           offdiagIdx[r] = cmap[ncols-1] + 1;
2510           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2511         }
2512       }
2513     }
2514 
2515     for (j=0; j<ncols; j++) {
2516       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2517       ba++; bj++;
2518     }
2519   }
2520 
2521   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2522   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2523   for (r = 0; r < m; ++r) {
2524     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2525       a[r] = diagA[r];
2526       if (idx) idx[r] = cstart + diagIdx[r];
2527     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2528       a[r] = diagA[r];
2529       if (idx) {
2530         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2531           idx[r] = cstart + diagIdx[r];
2532         } else idx[r] = offdiagIdx[r];
2533       }
2534     } else {
2535       a[r] = offdiagA[r];
2536       if (idx) idx[r] = offdiagIdx[r];
2537     }
2538   }
2539   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2540   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2541   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2542   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2543   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2544   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2545   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2550 {
2551   PetscErrorCode ierr;
2552   Mat            *dummy;
2553 
2554   PetscFunctionBegin;
2555   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2556   *newmat = *dummy;
2557   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2562 {
2563   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2568   A->factorerrortype = a->A->factorerrortype;
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2573 {
2574   PetscErrorCode ierr;
2575   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2576 
2577   PetscFunctionBegin;
2578   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2579   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2580   if (x->assembled) {
2581     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2582   } else {
2583     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2584   }
2585   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2591 {
2592   PetscFunctionBegin;
2593   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2594   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 /*@
2599    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2600 
2601    Collective on Mat
2602 
2603    Input Parameters:
2604 +    A - the matrix
2605 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2606 
2607  Level: advanced
2608 
2609 @*/
2610 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2611 {
2612   PetscErrorCode       ierr;
2613 
2614   PetscFunctionBegin;
2615   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2620 {
2621   PetscErrorCode       ierr;
2622   PetscBool            sc = PETSC_FALSE,flg;
2623 
2624   PetscFunctionBegin;
2625   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2626   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2627   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2628   if (flg) {
2629     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2630   }
2631   ierr = PetscOptionsTail();CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2636 {
2637   PetscErrorCode ierr;
2638   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2639   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2640 
2641   PetscFunctionBegin;
2642   if (!Y->preallocated) {
2643     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2644   } else if (!aij->nz) {
2645     PetscInt nonew = aij->nonew;
2646     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2647     aij->nonew = nonew;
2648   }
2649   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2654 {
2655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2656   PetscErrorCode ierr;
2657 
2658   PetscFunctionBegin;
2659   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2660   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2661   if (d) {
2662     PetscInt rstart;
2663     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2664     *d += rstart;
2665 
2666   }
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2671 {
2672   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 /* -------------------------------------------------------------------*/
2681 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2682                                        MatGetRow_MPIAIJ,
2683                                        MatRestoreRow_MPIAIJ,
2684                                        MatMult_MPIAIJ,
2685                                 /* 4*/ MatMultAdd_MPIAIJ,
2686                                        MatMultTranspose_MPIAIJ,
2687                                        MatMultTransposeAdd_MPIAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                 /*10*/ NULL,
2692                                        NULL,
2693                                        NULL,
2694                                        MatSOR_MPIAIJ,
2695                                        MatTranspose_MPIAIJ,
2696                                 /*15*/ MatGetInfo_MPIAIJ,
2697                                        MatEqual_MPIAIJ,
2698                                        MatGetDiagonal_MPIAIJ,
2699                                        MatDiagonalScale_MPIAIJ,
2700                                        MatNorm_MPIAIJ,
2701                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2702                                        MatAssemblyEnd_MPIAIJ,
2703                                        MatSetOption_MPIAIJ,
2704                                        MatZeroEntries_MPIAIJ,
2705                                 /*24*/ MatZeroRows_MPIAIJ,
2706                                        NULL,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                 /*29*/ MatSetUp_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                        MatGetDiagonalBlock_MPIAIJ,
2714                                        NULL,
2715                                 /*34*/ MatDuplicate_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*39*/ MatAXPY_MPIAIJ,
2721                                        MatCreateSubMatrices_MPIAIJ,
2722                                        MatIncreaseOverlap_MPIAIJ,
2723                                        MatGetValues_MPIAIJ,
2724                                        MatCopy_MPIAIJ,
2725                                 /*44*/ MatGetRowMax_MPIAIJ,
2726                                        MatScale_MPIAIJ,
2727                                        MatShift_MPIAIJ,
2728                                        MatDiagonalSet_MPIAIJ,
2729                                        MatZeroRowsColumns_MPIAIJ,
2730                                 /*49*/ MatSetRandom_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2736                                        NULL,
2737                                        MatSetUnfactored_MPIAIJ,
2738                                        MatPermute_MPIAIJ,
2739                                        NULL,
2740                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2741                                        MatDestroy_MPIAIJ,
2742                                        MatView_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                 /*64*/ NULL,
2746                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2747                                        NULL,
2748                                        NULL,
2749                                        NULL,
2750                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2751                                        MatGetRowMinAbs_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                 /*75*/ MatFDColoringApply_AIJ,
2757                                        MatSetFromOptions_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        MatFindZeroDiagonals_MPIAIJ,
2761                                 /*80*/ NULL,
2762                                        NULL,
2763                                        NULL,
2764                                 /*83*/ MatLoad_MPIAIJ,
2765                                        MatIsSymmetric_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                 /*89*/ NULL,
2771                                        NULL,
2772                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        NULL,
2779                                        MatBindToCPU_MPIAIJ,
2780                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2781                                        NULL,
2782                                        NULL,
2783                                        MatConjugate_MPIAIJ,
2784                                        NULL,
2785                                 /*104*/MatSetValuesRow_MPIAIJ,
2786                                        MatRealPart_MPIAIJ,
2787                                        MatImaginaryPart_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                 /*109*/NULL,
2791                                        NULL,
2792                                        MatGetRowMin_MPIAIJ,
2793                                        NULL,
2794                                        MatMissingDiagonal_MPIAIJ,
2795                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2796                                        NULL,
2797                                        MatGetGhosts_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        MatGetMultiProcBlock_MPIAIJ,
2805                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2806                                        MatGetColumnReductions_MPIAIJ,
2807                                        MatInvertBlockDiagonal_MPIAIJ,
2808                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2809                                        MatCreateSubMatricesMPI_MPIAIJ,
2810                                 /*129*/NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2814                                        NULL,
2815                                 /*134*/NULL,
2816                                        NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        NULL,
2820                                 /*139*/MatSetBlockSizes_MPIAIJ,
2821                                        NULL,
2822                                        NULL,
2823                                        MatFDColoringSetUp_MPIXAIJ,
2824                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2825                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2826                                 /*145*/NULL,
2827                                        NULL,
2828                                        NULL
2829 };
2830 
2831 /* ----------------------------------------------------------------------------------------*/
2832 
2833 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2834 {
2835   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2840   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2845 {
2846   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2847   PetscErrorCode ierr;
2848 
2849   PetscFunctionBegin;
2850   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2851   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2856 {
2857   Mat_MPIAIJ     *b;
2858   PetscErrorCode ierr;
2859   PetscMPIInt    size;
2860 
2861   PetscFunctionBegin;
2862   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2863   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2864   b = (Mat_MPIAIJ*)B->data;
2865 
2866 #if defined(PETSC_USE_CTABLE)
2867   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2868 #else
2869   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2870 #endif
2871   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2872   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2873   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2874 
2875   /* Because the B will have been resized we simply destroy it and create a new one each time */
2876   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2877   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2878   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2879   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2880   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2881   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2882   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2883 
2884   if (!B->preallocated) {
2885     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2886     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2887     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2888     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2890   }
2891 
2892   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2893   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2894   B->preallocated  = PETSC_TRUE;
2895   B->was_assembled = PETSC_FALSE;
2896   B->assembled     = PETSC_FALSE;
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2901 {
2902   Mat_MPIAIJ     *b;
2903   PetscErrorCode ierr;
2904 
2905   PetscFunctionBegin;
2906   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2907   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2908   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2909   b = (Mat_MPIAIJ*)B->data;
2910 
2911 #if defined(PETSC_USE_CTABLE)
2912   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2913 #else
2914   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2915 #endif
2916   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2917   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2918   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2919 
2920   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2921   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2922   B->preallocated  = PETSC_TRUE;
2923   B->was_assembled = PETSC_FALSE;
2924   B->assembled = PETSC_FALSE;
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2929 {
2930   Mat            mat;
2931   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2932   PetscErrorCode ierr;
2933 
2934   PetscFunctionBegin;
2935   *newmat = NULL;
2936   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2937   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2938   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2939   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2940   a       = (Mat_MPIAIJ*)mat->data;
2941 
2942   mat->factortype   = matin->factortype;
2943   mat->assembled    = matin->assembled;
2944   mat->insertmode   = NOT_SET_VALUES;
2945   mat->preallocated = matin->preallocated;
2946 
2947   a->size         = oldmat->size;
2948   a->rank         = oldmat->rank;
2949   a->donotstash   = oldmat->donotstash;
2950   a->roworiented  = oldmat->roworiented;
2951   a->rowindices   = NULL;
2952   a->rowvalues    = NULL;
2953   a->getrowactive = PETSC_FALSE;
2954 
2955   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2956   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2957 
2958   if (oldmat->colmap) {
2959 #if defined(PETSC_USE_CTABLE)
2960     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2961 #else
2962     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2963     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2964     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2965 #endif
2966   } else a->colmap = NULL;
2967   if (oldmat->garray) {
2968     PetscInt len;
2969     len  = oldmat->B->cmap->n;
2970     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2971     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2972     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2973   } else a->garray = NULL;
2974 
2975   /* It may happen MatDuplicate is called with a non-assembled matrix
2976      In fact, MatDuplicate only requires the matrix to be preallocated
2977      This may happen inside a DMCreateMatrix_Shell */
2978   if (oldmat->lvec) {
2979     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2980     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2981   }
2982   if (oldmat->Mvctx) {
2983     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2984     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2985   }
2986   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2987   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2988   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2989   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2990   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2991   *newmat = mat;
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2996 {
2997   PetscBool      isbinary, ishdf5;
2998   PetscErrorCode ierr;
2999 
3000   PetscFunctionBegin;
3001   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3002   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3003   /* force binary viewer to load .info file if it has not yet done so */
3004   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3005   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3006   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3007   if (isbinary) {
3008     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3009   } else if (ishdf5) {
3010 #if defined(PETSC_HAVE_HDF5)
3011     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3012 #else
3013     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3014 #endif
3015   } else {
3016     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3017   }
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3022 {
3023   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3024   PetscInt       *rowidxs,*colidxs;
3025   PetscScalar    *matvals;
3026   PetscErrorCode ierr;
3027 
3028   PetscFunctionBegin;
3029   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3030 
3031   /* read in matrix header */
3032   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3033   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3034   M  = header[1]; N = header[2]; nz = header[3];
3035   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3036   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3037   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3038 
3039   /* set block sizes from the viewer's .info file */
3040   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3041   /* set global sizes if not set already */
3042   if (mat->rmap->N < 0) mat->rmap->N = M;
3043   if (mat->cmap->N < 0) mat->cmap->N = N;
3044   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3045   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3046 
3047   /* check if the matrix sizes are correct */
3048   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3049   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3050 
3051   /* read in row lengths and build row indices */
3052   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3053   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3054   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3055   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3056   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3057   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3058   /* read in column indices and matrix values */
3059   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3060   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3061   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3062   /* store matrix indices and values */
3063   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3064   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3065   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 /* Not scalable because of ISAllGather() unless getting all columns. */
3070 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3071 {
3072   PetscErrorCode ierr;
3073   IS             iscol_local;
3074   PetscBool      isstride;
3075   PetscMPIInt    lisstride=0,gisstride;
3076 
3077   PetscFunctionBegin;
3078   /* check if we are grabbing all columns*/
3079   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3080 
3081   if (isstride) {
3082     PetscInt  start,len,mstart,mlen;
3083     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3084     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3085     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3086     if (mstart == start && mlen-mstart == len) lisstride = 1;
3087   }
3088 
3089   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3090   if (gisstride) {
3091     PetscInt N;
3092     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3093     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3094     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3095     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3096   } else {
3097     PetscInt cbs;
3098     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3099     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3100     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3101   }
3102 
3103   *isseq = iscol_local;
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 /*
3108  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3109  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3110 
3111  Input Parameters:
3112    mat - matrix
3113    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3114            i.e., mat->rstart <= isrow[i] < mat->rend
3115    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3116            i.e., mat->cstart <= iscol[i] < mat->cend
3117  Output Parameter:
3118    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3119    iscol_o - sequential column index set for retrieving mat->B
3120    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3121  */
3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3123 {
3124   PetscErrorCode ierr;
3125   Vec            x,cmap;
3126   const PetscInt *is_idx;
3127   PetscScalar    *xarray,*cmaparray;
3128   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3129   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3130   Mat            B=a->B;
3131   Vec            lvec=a->lvec,lcmap;
3132   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3133   MPI_Comm       comm;
3134   VecScatter     Mvctx=a->Mvctx;
3135 
3136   PetscFunctionBegin;
3137   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3138   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3139 
3140   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3141   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3142   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3143   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3144   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3145 
3146   /* Get start indices */
3147   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3148   isstart -= ncols;
3149   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3150 
3151   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3152   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3153   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3154   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3155   for (i=0; i<ncols; i++) {
3156     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3157     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3158     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3159   }
3160   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3161   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3162   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3163 
3164   /* Get iscol_d */
3165   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3166   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3167   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3168 
3169   /* Get isrow_d */
3170   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3171   rstart = mat->rmap->rstart;
3172   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3173   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3174   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3175   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3176 
3177   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3178   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3179   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3180 
3181   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3182   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3183   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3184 
3185   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3186 
3187   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3188   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3189 
3190   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3191   /* off-process column indices */
3192   count = 0;
3193   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3194   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3195 
3196   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3197   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3198   for (i=0; i<Bn; i++) {
3199     if (PetscRealPart(xarray[i]) > -1.0) {
3200       idx[count]     = i;                   /* local column index in off-diagonal part B */
3201       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3202       count++;
3203     }
3204   }
3205   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3206   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3207 
3208   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3209   /* cannot ensure iscol_o has same blocksize as iscol! */
3210 
3211   ierr = PetscFree(idx);CHKERRQ(ierr);
3212   *garray = cmap1;
3213 
3214   ierr = VecDestroy(&x);CHKERRQ(ierr);
3215   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3216   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3221 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3222 {
3223   PetscErrorCode ierr;
3224   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3225   Mat            M = NULL;
3226   MPI_Comm       comm;
3227   IS             iscol_d,isrow_d,iscol_o;
3228   Mat            Asub = NULL,Bsub = NULL;
3229   PetscInt       n;
3230 
3231   PetscFunctionBegin;
3232   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3233 
3234   if (call == MAT_REUSE_MATRIX) {
3235     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3236     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3237     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3238 
3239     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3240     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3241 
3242     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3243     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3244 
3245     /* Update diagonal and off-diagonal portions of submat */
3246     asub = (Mat_MPIAIJ*)(*submat)->data;
3247     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3248     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3249     if (n) {
3250       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3251     }
3252     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3253     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254 
3255   } else { /* call == MAT_INITIAL_MATRIX) */
3256     const PetscInt *garray;
3257     PetscInt        BsubN;
3258 
3259     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3260     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3261 
3262     /* Create local submatrices Asub and Bsub */
3263     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3264     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3265 
3266     /* Create submatrix M */
3267     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3268 
3269     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3270     asub = (Mat_MPIAIJ*)M->data;
3271 
3272     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3273     n = asub->B->cmap->N;
3274     if (BsubN > n) {
3275       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3276       const PetscInt *idx;
3277       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3278       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3279 
3280       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3281       j = 0;
3282       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3283       for (i=0; i<n; i++) {
3284         if (j >= BsubN) break;
3285         while (subgarray[i] > garray[j]) j++;
3286 
3287         if (subgarray[i] == garray[j]) {
3288           idx_new[i] = idx[j++];
3289         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3290       }
3291       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3292 
3293       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3294       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3295 
3296     } else if (BsubN < n) {
3297       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3298     }
3299 
3300     ierr = PetscFree(garray);CHKERRQ(ierr);
3301     *submat = M;
3302 
3303     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3304     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3305     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3306 
3307     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3308     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3309 
3310     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3311     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3312   }
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3317 {
3318   PetscErrorCode ierr;
3319   IS             iscol_local=NULL,isrow_d;
3320   PetscInt       csize;
3321   PetscInt       n,i,j,start,end;
3322   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3323   MPI_Comm       comm;
3324 
3325   PetscFunctionBegin;
3326   /* If isrow has same processor distribution as mat,
3327      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3328   if (call == MAT_REUSE_MATRIX) {
3329     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3330     if (isrow_d) {
3331       sameRowDist  = PETSC_TRUE;
3332       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3333     } else {
3334       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3335       if (iscol_local) {
3336         sameRowDist  = PETSC_TRUE;
3337         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3338       }
3339     }
3340   } else {
3341     /* Check if isrow has same processor distribution as mat */
3342     sameDist[0] = PETSC_FALSE;
3343     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3344     if (!n) {
3345       sameDist[0] = PETSC_TRUE;
3346     } else {
3347       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3348       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3349       if (i >= start && j < end) {
3350         sameDist[0] = PETSC_TRUE;
3351       }
3352     }
3353 
3354     /* Check if iscol has same processor distribution as mat */
3355     sameDist[1] = PETSC_FALSE;
3356     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3357     if (!n) {
3358       sameDist[1] = PETSC_TRUE;
3359     } else {
3360       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3361       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3362       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3363     }
3364 
3365     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3366     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3367     sameRowDist = tsameDist[0];
3368   }
3369 
3370   if (sameRowDist) {
3371     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3372       /* isrow and iscol have same processor distribution as mat */
3373       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3374       PetscFunctionReturn(0);
3375     } else { /* sameRowDist */
3376       /* isrow has same processor distribution as mat */
3377       if (call == MAT_INITIAL_MATRIX) {
3378         PetscBool sorted;
3379         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3380         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3381         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3382         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3383 
3384         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3385         if (sorted) {
3386           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3387           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3388           PetscFunctionReturn(0);
3389         }
3390       } else { /* call == MAT_REUSE_MATRIX */
3391         IS iscol_sub;
3392         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3393         if (iscol_sub) {
3394           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3395           PetscFunctionReturn(0);
3396         }
3397       }
3398     }
3399   }
3400 
3401   /* General case: iscol -> iscol_local which has global size of iscol */
3402   if (call == MAT_REUSE_MATRIX) {
3403     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3404     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3405   } else {
3406     if (!iscol_local) {
3407       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3408     }
3409   }
3410 
3411   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3412   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3413 
3414   if (call == MAT_INITIAL_MATRIX) {
3415     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3416     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3417   }
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 /*@C
3422      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3423          and "off-diagonal" part of the matrix in CSR format.
3424 
3425    Collective
3426 
3427    Input Parameters:
3428 +  comm - MPI communicator
3429 .  A - "diagonal" portion of matrix
3430 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3431 -  garray - global index of B columns
3432 
3433    Output Parameter:
3434 .   mat - the matrix, with input A as its local diagonal matrix
3435    Level: advanced
3436 
3437    Notes:
3438        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3439        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3440 
3441 .seealso: MatCreateMPIAIJWithSplitArrays()
3442 @*/
3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3444 {
3445   PetscErrorCode    ierr;
3446   Mat_MPIAIJ        *maij;
3447   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3448   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3449   const PetscScalar *oa;
3450   Mat               Bnew;
3451   PetscInt          m,n,N;
3452 
3453   PetscFunctionBegin;
3454   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3455   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3456   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3457   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3458   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3459   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3460 
3461   /* Get global columns of mat */
3462   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3463 
3464   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3465   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3466   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3467   maij = (Mat_MPIAIJ*)(*mat)->data;
3468 
3469   (*mat)->preallocated = PETSC_TRUE;
3470 
3471   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3472   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3473 
3474   /* Set A as diagonal portion of *mat */
3475   maij->A = A;
3476 
3477   nz = oi[m];
3478   for (i=0; i<nz; i++) {
3479     col   = oj[i];
3480     oj[i] = garray[col];
3481   }
3482 
3483   /* Set Bnew as off-diagonal portion of *mat */
3484   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3485   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3486   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3487   bnew        = (Mat_SeqAIJ*)Bnew->data;
3488   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3489   maij->B     = Bnew;
3490 
3491   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3492 
3493   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3494   b->free_a       = PETSC_FALSE;
3495   b->free_ij      = PETSC_FALSE;
3496   ierr = MatDestroy(&B);CHKERRQ(ierr);
3497 
3498   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3499   bnew->free_a       = PETSC_TRUE;
3500   bnew->free_ij      = PETSC_TRUE;
3501 
3502   /* condense columns of maij->B */
3503   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3504   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3505   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3506   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3507   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3508   PetscFunctionReturn(0);
3509 }
3510 
3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3512 
3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3514 {
3515   PetscErrorCode ierr;
3516   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3519   Mat            M,Msub,B=a->B;
3520   MatScalar      *aa;
3521   Mat_SeqAIJ     *aij;
3522   PetscInt       *garray = a->garray,*colsub,Ncols;
3523   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3524   IS             iscol_sub,iscmap;
3525   const PetscInt *is_idx,*cmap;
3526   PetscBool      allcolumns=PETSC_FALSE;
3527   MPI_Comm       comm;
3528 
3529   PetscFunctionBegin;
3530   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3531   if (call == MAT_REUSE_MATRIX) {
3532     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3533     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3534     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3535 
3536     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3537     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3538 
3539     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3540     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3541 
3542     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3543 
3544   } else { /* call == MAT_INITIAL_MATRIX) */
3545     PetscBool flg;
3546 
3547     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3548     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3549 
3550     /* (1) iscol -> nonscalable iscol_local */
3551     /* Check for special case: each processor gets entire matrix columns */
3552     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3553     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3554     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3555     if (allcolumns) {
3556       iscol_sub = iscol_local;
3557       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3558       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3559 
3560     } else {
3561       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3562       PetscInt *idx,*cmap1,k;
3563       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3564       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3565       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3566       count = 0;
3567       k     = 0;
3568       for (i=0; i<Ncols; i++) {
3569         j = is_idx[i];
3570         if (j >= cstart && j < cend) {
3571           /* diagonal part of mat */
3572           idx[count]     = j;
3573           cmap1[count++] = i; /* column index in submat */
3574         } else if (Bn) {
3575           /* off-diagonal part of mat */
3576           if (j == garray[k]) {
3577             idx[count]     = j;
3578             cmap1[count++] = i;  /* column index in submat */
3579           } else if (j > garray[k]) {
3580             while (j > garray[k] && k < Bn-1) k++;
3581             if (j == garray[k]) {
3582               idx[count]     = j;
3583               cmap1[count++] = i; /* column index in submat */
3584             }
3585           }
3586         }
3587       }
3588       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3589 
3590       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3591       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3592       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3593 
3594       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3595     }
3596 
3597     /* (3) Create sequential Msub */
3598     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3599   }
3600 
3601   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3602   aij  = (Mat_SeqAIJ*)(Msub)->data;
3603   ii   = aij->i;
3604   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3605 
3606   /*
3607       m - number of local rows
3608       Ncols - number of columns (same on all processors)
3609       rstart - first row in new global matrix generated
3610   */
3611   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3612 
3613   if (call == MAT_INITIAL_MATRIX) {
3614     /* (4) Create parallel newmat */
3615     PetscMPIInt    rank,size;
3616     PetscInt       csize;
3617 
3618     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3619     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3620 
3621     /*
3622         Determine the number of non-zeros in the diagonal and off-diagonal
3623         portions of the matrix in order to do correct preallocation
3624     */
3625 
3626     /* first get start and end of "diagonal" columns */
3627     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3628     if (csize == PETSC_DECIDE) {
3629       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3630       if (mglobal == Ncols) { /* square matrix */
3631         nlocal = m;
3632       } else {
3633         nlocal = Ncols/size + ((Ncols % size) > rank);
3634       }
3635     } else {
3636       nlocal = csize;
3637     }
3638     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3639     rstart = rend - nlocal;
3640     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3641 
3642     /* next, compute all the lengths */
3643     jj    = aij->j;
3644     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3645     olens = dlens + m;
3646     for (i=0; i<m; i++) {
3647       jend = ii[i+1] - ii[i];
3648       olen = 0;
3649       dlen = 0;
3650       for (j=0; j<jend; j++) {
3651         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3652         else dlen++;
3653         jj++;
3654       }
3655       olens[i] = olen;
3656       dlens[i] = dlen;
3657     }
3658 
3659     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3660     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3661 
3662     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3663     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3664     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3665     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3666     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3667     ierr = PetscFree(dlens);CHKERRQ(ierr);
3668 
3669   } else { /* call == MAT_REUSE_MATRIX */
3670     M    = *newmat;
3671     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3672     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3673     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3674     /*
3675          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3676        rather than the slower MatSetValues().
3677     */
3678     M->was_assembled = PETSC_TRUE;
3679     M->assembled     = PETSC_FALSE;
3680   }
3681 
3682   /* (5) Set values of Msub to *newmat */
3683   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3684   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3685 
3686   jj   = aij->j;
3687   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3688   for (i=0; i<m; i++) {
3689     row = rstart + i;
3690     nz  = ii[i+1] - ii[i];
3691     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3692     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3693     jj += nz; aa += nz;
3694   }
3695   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3696   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3697 
3698   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3700 
3701   ierr = PetscFree(colsub);CHKERRQ(ierr);
3702 
3703   /* save Msub, iscol_sub and iscmap used in processor for next request */
3704   if (call == MAT_INITIAL_MATRIX) {
3705     *newmat = M;
3706     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3707     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3708 
3709     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3710     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3711 
3712     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3713     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3714 
3715     if (iscol_local) {
3716       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3717       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3718     }
3719   }
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 /*
3724     Not great since it makes two copies of the submatrix, first an SeqAIJ
3725   in local and then by concatenating the local matrices the end result.
3726   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3727 
3728   Note: This requires a sequential iscol with all indices.
3729 */
3730 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3731 {
3732   PetscErrorCode ierr;
3733   PetscMPIInt    rank,size;
3734   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3735   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3736   Mat            M,Mreuse;
3737   MatScalar      *aa,*vwork;
3738   MPI_Comm       comm;
3739   Mat_SeqAIJ     *aij;
3740   PetscBool      colflag,allcolumns=PETSC_FALSE;
3741 
3742   PetscFunctionBegin;
3743   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3744   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3745   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3746 
3747   /* Check for special case: each processor gets entire matrix columns */
3748   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3749   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3750   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3751   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3752 
3753   if (call ==  MAT_REUSE_MATRIX) {
3754     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3755     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3756     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3759   }
3760 
3761   /*
3762       m - number of local rows
3763       n - number of columns (same on all processors)
3764       rstart - first row in new global matrix generated
3765   */
3766   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3767   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3768   if (call == MAT_INITIAL_MATRIX) {
3769     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3770     ii  = aij->i;
3771     jj  = aij->j;
3772 
3773     /*
3774         Determine the number of non-zeros in the diagonal and off-diagonal
3775         portions of the matrix in order to do correct preallocation
3776     */
3777 
3778     /* first get start and end of "diagonal" columns */
3779     if (csize == PETSC_DECIDE) {
3780       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3781       if (mglobal == n) { /* square matrix */
3782         nlocal = m;
3783       } else {
3784         nlocal = n/size + ((n % size) > rank);
3785       }
3786     } else {
3787       nlocal = csize;
3788     }
3789     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3790     rstart = rend - nlocal;
3791     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3792 
3793     /* next, compute all the lengths */
3794     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3795     olens = dlens + m;
3796     for (i=0; i<m; i++) {
3797       jend = ii[i+1] - ii[i];
3798       olen = 0;
3799       dlen = 0;
3800       for (j=0; j<jend; j++) {
3801         if (*jj < rstart || *jj >= rend) olen++;
3802         else dlen++;
3803         jj++;
3804       }
3805       olens[i] = olen;
3806       dlens[i] = dlen;
3807     }
3808     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3809     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3810     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3811     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3812     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3813     ierr = PetscFree(dlens);CHKERRQ(ierr);
3814   } else {
3815     PetscInt ml,nl;
3816 
3817     M    = *newmat;
3818     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3819     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3820     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3821     /*
3822          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3823        rather than the slower MatSetValues().
3824     */
3825     M->was_assembled = PETSC_TRUE;
3826     M->assembled     = PETSC_FALSE;
3827   }
3828   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3829   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3830   ii   = aij->i;
3831   jj   = aij->j;
3832 
3833   /* trigger copy to CPU if needed */
3834   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3835   for (i=0; i<m; i++) {
3836     row   = rstart + i;
3837     nz    = ii[i+1] - ii[i];
3838     cwork = jj; jj += nz;
3839     vwork = aa; aa += nz;
3840     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3841   }
3842   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3843 
3844   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846   *newmat = M;
3847 
3848   /* save submatrix used in processor for next request */
3849   if (call ==  MAT_INITIAL_MATRIX) {
3850     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3851     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3852   }
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3857 {
3858   PetscInt       m,cstart, cend,j,nnz,i,d;
3859   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3860   const PetscInt *JJ;
3861   PetscErrorCode ierr;
3862   PetscBool      nooffprocentries;
3863 
3864   PetscFunctionBegin;
3865   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3866 
3867   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3868   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3869   m      = B->rmap->n;
3870   cstart = B->cmap->rstart;
3871   cend   = B->cmap->rend;
3872   rstart = B->rmap->rstart;
3873 
3874   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3875 
3876   if (PetscDefined(USE_DEBUG)) {
3877     for (i=0; i<m; i++) {
3878       nnz = Ii[i+1]- Ii[i];
3879       JJ  = J + Ii[i];
3880       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3881       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3882       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3883     }
3884   }
3885 
3886   for (i=0; i<m; i++) {
3887     nnz     = Ii[i+1]- Ii[i];
3888     JJ      = J + Ii[i];
3889     nnz_max = PetscMax(nnz_max,nnz);
3890     d       = 0;
3891     for (j=0; j<nnz; j++) {
3892       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3893     }
3894     d_nnz[i] = d;
3895     o_nnz[i] = nnz - d;
3896   }
3897   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3898   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3899 
3900   for (i=0; i<m; i++) {
3901     ii   = i + rstart;
3902     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3903   }
3904   nooffprocentries    = B->nooffprocentries;
3905   B->nooffprocentries = PETSC_TRUE;
3906   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3907   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3908   B->nooffprocentries = nooffprocentries;
3909 
3910   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 /*@
3915    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3916    (the default parallel PETSc format).
3917 
3918    Collective
3919 
3920    Input Parameters:
3921 +  B - the matrix
3922 .  i - the indices into j for the start of each local row (starts with zero)
3923 .  j - the column indices for each local row (starts with zero)
3924 -  v - optional values in the matrix
3925 
3926    Level: developer
3927 
3928    Notes:
3929        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3930      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3931      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3932 
3933        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3934 
3935        The format which is used for the sparse matrix input, is equivalent to a
3936     row-major ordering.. i.e for the following matrix, the input data expected is
3937     as shown
3938 
3939 $        1 0 0
3940 $        2 0 3     P0
3941 $       -------
3942 $        4 5 6     P1
3943 $
3944 $     Process0 [P0]: rows_owned=[0,1]
3945 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3946 $        j =  {0,0,2}  [size = 3]
3947 $        v =  {1,2,3}  [size = 3]
3948 $
3949 $     Process1 [P1]: rows_owned=[2]
3950 $        i =  {0,3}    [size = nrow+1  = 1+1]
3951 $        j =  {0,1,2}  [size = 3]
3952 $        v =  {4,5,6}  [size = 3]
3953 
3954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3955           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3956 @*/
3957 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3958 {
3959   PetscErrorCode ierr;
3960 
3961   PetscFunctionBegin;
3962   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3963   PetscFunctionReturn(0);
3964 }
3965 
3966 /*@C
3967    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3968    (the default parallel PETSc format).  For good matrix assembly performance
3969    the user should preallocate the matrix storage by setting the parameters
3970    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3971    performance can be increased by more than a factor of 50.
3972 
3973    Collective
3974 
3975    Input Parameters:
3976 +  B - the matrix
3977 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3978            (same value is used for all local rows)
3979 .  d_nnz - array containing the number of nonzeros in the various rows of the
3980            DIAGONAL portion of the local submatrix (possibly different for each row)
3981            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3982            The size of this array is equal to the number of local rows, i.e 'm'.
3983            For matrices that will be factored, you must leave room for (and set)
3984            the diagonal entry even if it is zero.
3985 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3986            submatrix (same value is used for all local rows).
3987 -  o_nnz - array containing the number of nonzeros in the various rows of the
3988            OFF-DIAGONAL portion of the local submatrix (possibly different for
3989            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3990            structure. The size of this array is equal to the number
3991            of local rows, i.e 'm'.
3992 
3993    If the *_nnz parameter is given then the *_nz parameter is ignored
3994 
3995    The AIJ format (also called the Yale sparse matrix format or
3996    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3997    storage.  The stored row and column indices begin with zero.
3998    See Users-Manual: ch_mat for details.
3999 
4000    The parallel matrix is partitioned such that the first m0 rows belong to
4001    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4002    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4003 
4004    The DIAGONAL portion of the local submatrix of a processor can be defined
4005    as the submatrix which is obtained by extraction the part corresponding to
4006    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4007    first row that belongs to the processor, r2 is the last row belonging to
4008    the this processor, and c1-c2 is range of indices of the local part of a
4009    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4010    common case of a square matrix, the row and column ranges are the same and
4011    the DIAGONAL part is also square. The remaining portion of the local
4012    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4013 
4014    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4015 
4016    You can call MatGetInfo() to get information on how effective the preallocation was;
4017    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4018    You can also run with the option -info and look for messages with the string
4019    malloc in them to see if additional memory allocation was needed.
4020 
4021    Example usage:
4022 
4023    Consider the following 8x8 matrix with 34 non-zero values, that is
4024    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4025    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4026    as follows:
4027 
4028 .vb
4029             1  2  0  |  0  3  0  |  0  4
4030     Proc0   0  5  6  |  7  0  0  |  8  0
4031             9  0 10  | 11  0  0  | 12  0
4032     -------------------------------------
4033            13  0 14  | 15 16 17  |  0  0
4034     Proc1   0 18  0  | 19 20 21  |  0  0
4035             0  0  0  | 22 23  0  | 24  0
4036     -------------------------------------
4037     Proc2  25 26 27  |  0  0 28  | 29  0
4038            30  0  0  | 31 32 33  |  0 34
4039 .ve
4040 
4041    This can be represented as a collection of submatrices as:
4042 
4043 .vb
4044       A B C
4045       D E F
4046       G H I
4047 .ve
4048 
4049    Where the submatrices A,B,C are owned by proc0, D,E,F are
4050    owned by proc1, G,H,I are owned by proc2.
4051 
4052    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4053    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4054    The 'M','N' parameters are 8,8, and have the same values on all procs.
4055 
4056    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4057    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4058    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4059    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4060    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4061    matrix, ans [DF] as another SeqAIJ matrix.
4062 
4063    When d_nz, o_nz parameters are specified, d_nz storage elements are
4064    allocated for every row of the local diagonal submatrix, and o_nz
4065    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4066    One way to choose d_nz and o_nz is to use the max nonzerors per local
4067    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4068    In this case, the values of d_nz,o_nz are:
4069 .vb
4070      proc0 : dnz = 2, o_nz = 2
4071      proc1 : dnz = 3, o_nz = 2
4072      proc2 : dnz = 1, o_nz = 4
4073 .ve
4074    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4075    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4076    for proc3. i.e we are using 12+15+10=37 storage locations to store
4077    34 values.
4078 
4079    When d_nnz, o_nnz parameters are specified, the storage is specified
4080    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4081    In the above case the values for d_nnz,o_nnz are:
4082 .vb
4083      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4084      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4085      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4086 .ve
4087    Here the space allocated is sum of all the above values i.e 34, and
4088    hence pre-allocation is perfect.
4089 
4090    Level: intermediate
4091 
4092 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4093           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4094 @*/
4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4096 {
4097   PetscErrorCode ierr;
4098 
4099   PetscFunctionBegin;
4100   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4101   PetscValidType(B,1);
4102   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4103   PetscFunctionReturn(0);
4104 }
4105 
4106 /*@
4107      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4108          CSR format for the local rows.
4109 
4110    Collective
4111 
4112    Input Parameters:
4113 +  comm - MPI communicator
4114 .  m - number of local rows (Cannot be PETSC_DECIDE)
4115 .  n - This value should be the same as the local size used in creating the
4116        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4117        calculated if N is given) For square matrices n is almost always m.
4118 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4119 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4120 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4121 .   j - column indices
4122 -   a - matrix values
4123 
4124    Output Parameter:
4125 .   mat - the matrix
4126 
4127    Level: intermediate
4128 
4129    Notes:
4130        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4131      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4132      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4133 
4134        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4135 
4136        The format which is used for the sparse matrix input, is equivalent to a
4137     row-major ordering.. i.e for the following matrix, the input data expected is
4138     as shown
4139 
4140        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4141 
4142 $        1 0 0
4143 $        2 0 3     P0
4144 $       -------
4145 $        4 5 6     P1
4146 $
4147 $     Process0 [P0]: rows_owned=[0,1]
4148 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4149 $        j =  {0,0,2}  [size = 3]
4150 $        v =  {1,2,3}  [size = 3]
4151 $
4152 $     Process1 [P1]: rows_owned=[2]
4153 $        i =  {0,3}    [size = nrow+1  = 1+1]
4154 $        j =  {0,1,2}  [size = 3]
4155 $        v =  {4,5,6}  [size = 3]
4156 
4157 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4158           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4159 @*/
4160 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4161 {
4162   PetscErrorCode ierr;
4163 
4164   PetscFunctionBegin;
4165   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4166   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4167   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4168   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4169   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4170   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4171   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 /*@
4176      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4177          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4178 
4179    Collective
4180 
4181    Input Parameters:
4182 +  mat - the matrix
4183 .  m - number of local rows (Cannot be PETSC_DECIDE)
4184 .  n - This value should be the same as the local size used in creating the
4185        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4186        calculated if N is given) For square matrices n is almost always m.
4187 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4188 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4189 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4190 .  J - column indices
4191 -  v - matrix values
4192 
4193    Level: intermediate
4194 
4195 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4196           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4197 @*/
4198 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4199 {
4200   PetscErrorCode ierr;
4201   PetscInt       cstart,nnz,i,j;
4202   PetscInt       *ld;
4203   PetscBool      nooffprocentries;
4204   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4205   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4206   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4207   const PetscInt *Adi = Ad->i;
4208   PetscInt       ldi,Iii,md;
4209 
4210   PetscFunctionBegin;
4211   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4212   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4213   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4214   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4215 
4216   cstart = mat->cmap->rstart;
4217   if (!Aij->ld) {
4218     /* count number of entries below block diagonal */
4219     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4220     Aij->ld = ld;
4221     for (i=0; i<m; i++) {
4222       nnz  = Ii[i+1]- Ii[i];
4223       j     = 0;
4224       while  (J[j] < cstart && j < nnz) {j++;}
4225       J    += nnz;
4226       ld[i] = j;
4227     }
4228   } else {
4229     ld = Aij->ld;
4230   }
4231 
4232   for (i=0; i<m; i++) {
4233     nnz  = Ii[i+1]- Ii[i];
4234     Iii  = Ii[i];
4235     ldi  = ld[i];
4236     md   = Adi[i+1]-Adi[i];
4237     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4238     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4239     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4240     ad  += md;
4241     ao  += nnz - md;
4242   }
4243   nooffprocentries      = mat->nooffprocentries;
4244   mat->nooffprocentries = PETSC_TRUE;
4245   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4246   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4247   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4248   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4249   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4250   mat->nooffprocentries = nooffprocentries;
4251   PetscFunctionReturn(0);
4252 }
4253 
4254 /*@C
4255    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4256    (the default parallel PETSc format).  For good matrix assembly performance
4257    the user should preallocate the matrix storage by setting the parameters
4258    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4259    performance can be increased by more than a factor of 50.
4260 
4261    Collective
4262 
4263    Input Parameters:
4264 +  comm - MPI communicator
4265 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4266            This value should be the same as the local size used in creating the
4267            y vector for the matrix-vector product y = Ax.
4268 .  n - This value should be the same as the local size used in creating the
4269        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4270        calculated if N is given) For square matrices n is almost always m.
4271 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4272 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4273 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4274            (same value is used for all local rows)
4275 .  d_nnz - array containing the number of nonzeros in the various rows of the
4276            DIAGONAL portion of the local submatrix (possibly different for each row)
4277            or NULL, if d_nz is used to specify the nonzero structure.
4278            The size of this array is equal to the number of local rows, i.e 'm'.
4279 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4280            submatrix (same value is used for all local rows).
4281 -  o_nnz - array containing the number of nonzeros in the various rows of the
4282            OFF-DIAGONAL portion of the local submatrix (possibly different for
4283            each row) or NULL, if o_nz is used to specify the nonzero
4284            structure. The size of this array is equal to the number
4285            of local rows, i.e 'm'.
4286 
4287    Output Parameter:
4288 .  A - the matrix
4289 
4290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4291    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4293 
4294    Notes:
4295    If the *_nnz parameter is given then the *_nz parameter is ignored
4296 
4297    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4298    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4299    storage requirements for this matrix.
4300 
4301    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4302    processor than it must be used on all processors that share the object for
4303    that argument.
4304 
4305    The user MUST specify either the local or global matrix dimensions
4306    (possibly both).
4307 
4308    The parallel matrix is partitioned across processors such that the
4309    first m0 rows belong to process 0, the next m1 rows belong to
4310    process 1, the next m2 rows belong to process 2 etc.. where
4311    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4312    values corresponding to [m x N] submatrix.
4313 
4314    The columns are logically partitioned with the n0 columns belonging
4315    to 0th partition, the next n1 columns belonging to the next
4316    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4317 
4318    The DIAGONAL portion of the local submatrix on any given processor
4319    is the submatrix corresponding to the rows and columns m,n
4320    corresponding to the given processor. i.e diagonal matrix on
4321    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4322    etc. The remaining portion of the local submatrix [m x (N-n)]
4323    constitute the OFF-DIAGONAL portion. The example below better
4324    illustrates this concept.
4325 
4326    For a square global matrix we define each processor's diagonal portion
4327    to be its local rows and the corresponding columns (a square submatrix);
4328    each processor's off-diagonal portion encompasses the remainder of the
4329    local matrix (a rectangular submatrix).
4330 
4331    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4332 
4333    When calling this routine with a single process communicator, a matrix of
4334    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4335    type of communicator, use the construction mechanism
4336 .vb
4337      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4338 .ve
4339 
4340 $     MatCreate(...,&A);
4341 $     MatSetType(A,MATMPIAIJ);
4342 $     MatSetSizes(A, m,n,M,N);
4343 $     MatMPIAIJSetPreallocation(A,...);
4344 
4345    By default, this format uses inodes (identical nodes) when possible.
4346    We search for consecutive rows with the same nonzero structure, thereby
4347    reusing matrix information to achieve increased efficiency.
4348 
4349    Options Database Keys:
4350 +  -mat_no_inode  - Do not use inodes
4351 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4352 
4353    Example usage:
4354 
4355    Consider the following 8x8 matrix with 34 non-zero values, that is
4356    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4357    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4358    as follows
4359 
4360 .vb
4361             1  2  0  |  0  3  0  |  0  4
4362     Proc0   0  5  6  |  7  0  0  |  8  0
4363             9  0 10  | 11  0  0  | 12  0
4364     -------------------------------------
4365            13  0 14  | 15 16 17  |  0  0
4366     Proc1   0 18  0  | 19 20 21  |  0  0
4367             0  0  0  | 22 23  0  | 24  0
4368     -------------------------------------
4369     Proc2  25 26 27  |  0  0 28  | 29  0
4370            30  0  0  | 31 32 33  |  0 34
4371 .ve
4372 
4373    This can be represented as a collection of submatrices as
4374 
4375 .vb
4376       A B C
4377       D E F
4378       G H I
4379 .ve
4380 
4381    Where the submatrices A,B,C are owned by proc0, D,E,F are
4382    owned by proc1, G,H,I are owned by proc2.
4383 
4384    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4385    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4386    The 'M','N' parameters are 8,8, and have the same values on all procs.
4387 
4388    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4389    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4390    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4391    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4392    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4393    matrix, ans [DF] as another SeqAIJ matrix.
4394 
4395    When d_nz, o_nz parameters are specified, d_nz storage elements are
4396    allocated for every row of the local diagonal submatrix, and o_nz
4397    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4398    One way to choose d_nz and o_nz is to use the max nonzerors per local
4399    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4400    In this case, the values of d_nz,o_nz are
4401 .vb
4402      proc0 : dnz = 2, o_nz = 2
4403      proc1 : dnz = 3, o_nz = 2
4404      proc2 : dnz = 1, o_nz = 4
4405 .ve
4406    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4407    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4408    for proc3. i.e we are using 12+15+10=37 storage locations to store
4409    34 values.
4410 
4411    When d_nnz, o_nnz parameters are specified, the storage is specified
4412    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4413    In the above case the values for d_nnz,o_nnz are
4414 .vb
4415      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4416      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4417      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4418 .ve
4419    Here the space allocated is sum of all the above values i.e 34, and
4420    hence pre-allocation is perfect.
4421 
4422    Level: intermediate
4423 
4424 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4425           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4426 @*/
4427 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4428 {
4429   PetscErrorCode ierr;
4430   PetscMPIInt    size;
4431 
4432   PetscFunctionBegin;
4433   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4434   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4435   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4436   if (size > 1) {
4437     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4438     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4439   } else {
4440     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4441     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4442   }
4443   PetscFunctionReturn(0);
4444 }
4445 
4446 /*@C
4447   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4448 
4449   Not collective
4450 
4451   Input Parameter:
4452 . A - The MPIAIJ matrix
4453 
4454   Output Parameters:
4455 + Ad - The local diagonal block as a SeqAIJ matrix
4456 . Ao - The local off-diagonal block as a SeqAIJ matrix
4457 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4458 
4459   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4460   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4461   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4462   local column numbers to global column numbers in the original matrix.
4463 
4464   Level: intermediate
4465 
4466 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4467 @*/
4468 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4469 {
4470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4471   PetscBool      flg;
4472   PetscErrorCode ierr;
4473 
4474   PetscFunctionBegin;
4475   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4476   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4477   if (Ad)     *Ad     = a->A;
4478   if (Ao)     *Ao     = a->B;
4479   if (colmap) *colmap = a->garray;
4480   PetscFunctionReturn(0);
4481 }
4482 
4483 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4484 {
4485   PetscErrorCode ierr;
4486   PetscInt       m,N,i,rstart,nnz,Ii;
4487   PetscInt       *indx;
4488   PetscScalar    *values;
4489   MatType        rootType;
4490 
4491   PetscFunctionBegin;
4492   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4493   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4494     PetscInt       *dnz,*onz,sum,bs,cbs;
4495 
4496     if (n == PETSC_DECIDE) {
4497       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4498     }
4499     /* Check sum(n) = N */
4500     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4501     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4502 
4503     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4504     rstart -= m;
4505 
4506     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4507     for (i=0; i<m; i++) {
4508       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4509       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4510       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4511     }
4512 
4513     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4514     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4515     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4516     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4517     ierr = MatGetRootType_Private(inmat,&rootType);CHKERRQ(ierr);
4518     ierr = MatSetType(*outmat,rootType);CHKERRQ(ierr);
4519     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4520     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4521     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4522     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4523   }
4524 
4525   /* numeric phase */
4526   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4527   for (i=0; i<m; i++) {
4528     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4529     Ii   = i + rstart;
4530     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4531     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4532   }
4533   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4534   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4535   PetscFunctionReturn(0);
4536 }
4537 
4538 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4539 {
4540   PetscErrorCode    ierr;
4541   PetscMPIInt       rank;
4542   PetscInt          m,N,i,rstart,nnz;
4543   size_t            len;
4544   const PetscInt    *indx;
4545   PetscViewer       out;
4546   char              *name;
4547   Mat               B;
4548   const PetscScalar *values;
4549 
4550   PetscFunctionBegin;
4551   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4552   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4553   /* Should this be the type of the diagonal block of A? */
4554   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4555   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4556   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4557   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4558   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4559   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4560   for (i=0; i<m; i++) {
4561     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4562     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4563     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4564   }
4565   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4566   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4567 
4568   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4569   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4570   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4571   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4572   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4573   ierr = PetscFree(name);CHKERRQ(ierr);
4574   ierr = MatView(B,out);CHKERRQ(ierr);
4575   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4576   ierr = MatDestroy(&B);CHKERRQ(ierr);
4577   PetscFunctionReturn(0);
4578 }
4579 
4580 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4581 {
4582   PetscErrorCode      ierr;
4583   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4584 
4585   PetscFunctionBegin;
4586   if (!merge) PetscFunctionReturn(0);
4587   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4588   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4589   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4590   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4591   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4592   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4593   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4594   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4595   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4596   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4597   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4598   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4599   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4600   ierr = PetscFree(merge);CHKERRQ(ierr);
4601   PetscFunctionReturn(0);
4602 }
4603 
4604 #include <../src/mat/utils/freespace.h>
4605 #include <petscbt.h>
4606 
4607 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4608 {
4609   PetscErrorCode      ierr;
4610   MPI_Comm            comm;
4611   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4612   PetscMPIInt         size,rank,taga,*len_s;
4613   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4614   PetscInt            proc,m;
4615   PetscInt            **buf_ri,**buf_rj;
4616   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4617   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4618   MPI_Request         *s_waits,*r_waits;
4619   MPI_Status          *status;
4620   MatScalar           *aa=a->a;
4621   MatScalar           **abuf_r,*ba_i;
4622   Mat_Merge_SeqsToMPI *merge;
4623   PetscContainer      container;
4624 
4625   PetscFunctionBegin;
4626   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4627   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4628 
4629   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4630   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4631 
4632   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4633   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4634   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4635 
4636   bi     = merge->bi;
4637   bj     = merge->bj;
4638   buf_ri = merge->buf_ri;
4639   buf_rj = merge->buf_rj;
4640 
4641   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4642   owners = merge->rowmap->range;
4643   len_s  = merge->len_s;
4644 
4645   /* send and recv matrix values */
4646   /*-----------------------------*/
4647   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4648   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4649 
4650   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4651   for (proc=0,k=0; proc<size; proc++) {
4652     if (!len_s[proc]) continue;
4653     i    = owners[proc];
4654     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4655     k++;
4656   }
4657 
4658   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4659   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4660   ierr = PetscFree(status);CHKERRQ(ierr);
4661 
4662   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4663   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4664 
4665   /* insert mat values of mpimat */
4666   /*----------------------------*/
4667   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4668   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4669 
4670   for (k=0; k<merge->nrecv; k++) {
4671     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4672     nrows       = *(buf_ri_k[k]);
4673     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4674     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4675   }
4676 
4677   /* set values of ba */
4678   m = merge->rowmap->n;
4679   for (i=0; i<m; i++) {
4680     arow = owners[rank] + i;
4681     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4682     bnzi = bi[i+1] - bi[i];
4683     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4684 
4685     /* add local non-zero vals of this proc's seqmat into ba */
4686     anzi   = ai[arow+1] - ai[arow];
4687     aj     = a->j + ai[arow];
4688     aa     = a->a + ai[arow];
4689     nextaj = 0;
4690     for (j=0; nextaj<anzi; j++) {
4691       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4692         ba_i[j] += aa[nextaj++];
4693       }
4694     }
4695 
4696     /* add received vals into ba */
4697     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4698       /* i-th row */
4699       if (i == *nextrow[k]) {
4700         anzi   = *(nextai[k]+1) - *nextai[k];
4701         aj     = buf_rj[k] + *(nextai[k]);
4702         aa     = abuf_r[k] + *(nextai[k]);
4703         nextaj = 0;
4704         for (j=0; nextaj<anzi; j++) {
4705           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4706             ba_i[j] += aa[nextaj++];
4707           }
4708         }
4709         nextrow[k]++; nextai[k]++;
4710       }
4711     }
4712     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4713   }
4714   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4715   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4716 
4717   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4718   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4719   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4720   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4721   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4722   PetscFunctionReturn(0);
4723 }
4724 
4725 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4726 {
4727   PetscErrorCode      ierr;
4728   Mat                 B_mpi;
4729   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4730   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4731   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4732   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4733   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4734   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4735   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4736   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4737   MPI_Status          *status;
4738   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4739   PetscBT             lnkbt;
4740   Mat_Merge_SeqsToMPI *merge;
4741   PetscContainer      container;
4742 
4743   PetscFunctionBegin;
4744   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4745 
4746   /* make sure it is a PETSc comm */
4747   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4748   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4749   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4750 
4751   ierr = PetscNew(&merge);CHKERRQ(ierr);
4752   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4753 
4754   /* determine row ownership */
4755   /*---------------------------------------------------------*/
4756   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4757   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4758   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4759   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4760   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4761   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4762   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4763 
4764   m      = merge->rowmap->n;
4765   owners = merge->rowmap->range;
4766 
4767   /* determine the number of messages to send, their lengths */
4768   /*---------------------------------------------------------*/
4769   len_s = merge->len_s;
4770 
4771   len          = 0; /* length of buf_si[] */
4772   merge->nsend = 0;
4773   for (proc=0; proc<size; proc++) {
4774     len_si[proc] = 0;
4775     if (proc == rank) {
4776       len_s[proc] = 0;
4777     } else {
4778       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4779       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4780     }
4781     if (len_s[proc]) {
4782       merge->nsend++;
4783       nrows = 0;
4784       for (i=owners[proc]; i<owners[proc+1]; i++) {
4785         if (ai[i+1] > ai[i]) nrows++;
4786       }
4787       len_si[proc] = 2*(nrows+1);
4788       len         += len_si[proc];
4789     }
4790   }
4791 
4792   /* determine the number and length of messages to receive for ij-structure */
4793   /*-------------------------------------------------------------------------*/
4794   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4795   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4796 
4797   /* post the Irecv of j-structure */
4798   /*-------------------------------*/
4799   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4800   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4801 
4802   /* post the Isend of j-structure */
4803   /*--------------------------------*/
4804   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4805 
4806   for (proc=0, k=0; proc<size; proc++) {
4807     if (!len_s[proc]) continue;
4808     i    = owners[proc];
4809     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4810     k++;
4811   }
4812 
4813   /* receives and sends of j-structure are complete */
4814   /*------------------------------------------------*/
4815   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4816   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4817 
4818   /* send and recv i-structure */
4819   /*---------------------------*/
4820   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4821   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4822 
4823   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4824   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4825   for (proc=0,k=0; proc<size; proc++) {
4826     if (!len_s[proc]) continue;
4827     /* form outgoing message for i-structure:
4828          buf_si[0]:                 nrows to be sent
4829                [1:nrows]:           row index (global)
4830                [nrows+1:2*nrows+1]: i-structure index
4831     */
4832     /*-------------------------------------------*/
4833     nrows       = len_si[proc]/2 - 1;
4834     buf_si_i    = buf_si + nrows+1;
4835     buf_si[0]   = nrows;
4836     buf_si_i[0] = 0;
4837     nrows       = 0;
4838     for (i=owners[proc]; i<owners[proc+1]; i++) {
4839       anzi = ai[i+1] - ai[i];
4840       if (anzi) {
4841         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4842         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4843         nrows++;
4844       }
4845     }
4846     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4847     k++;
4848     buf_si += len_si[proc];
4849   }
4850 
4851   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4852   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4853 
4854   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4855   for (i=0; i<merge->nrecv; i++) {
4856     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4857   }
4858 
4859   ierr = PetscFree(len_si);CHKERRQ(ierr);
4860   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4861   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4862   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4863   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4864   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4865   ierr = PetscFree(status);CHKERRQ(ierr);
4866 
4867   /* compute a local seq matrix in each processor */
4868   /*----------------------------------------------*/
4869   /* allocate bi array and free space for accumulating nonzero column info */
4870   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4871   bi[0] = 0;
4872 
4873   /* create and initialize a linked list */
4874   nlnk = N+1;
4875   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4876 
4877   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4878   len  = ai[owners[rank+1]] - ai[owners[rank]];
4879   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4880 
4881   current_space = free_space;
4882 
4883   /* determine symbolic info for each local row */
4884   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4885 
4886   for (k=0; k<merge->nrecv; k++) {
4887     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4888     nrows       = *buf_ri_k[k];
4889     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4890     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4891   }
4892 
4893   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4894   len  = 0;
4895   for (i=0; i<m; i++) {
4896     bnzi = 0;
4897     /* add local non-zero cols of this proc's seqmat into lnk */
4898     arow  = owners[rank] + i;
4899     anzi  = ai[arow+1] - ai[arow];
4900     aj    = a->j + ai[arow];
4901     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4902     bnzi += nlnk;
4903     /* add received col data into lnk */
4904     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4905       if (i == *nextrow[k]) { /* i-th row */
4906         anzi  = *(nextai[k]+1) - *nextai[k];
4907         aj    = buf_rj[k] + *nextai[k];
4908         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4909         bnzi += nlnk;
4910         nextrow[k]++; nextai[k]++;
4911       }
4912     }
4913     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4914 
4915     /* if free space is not available, make more free space */
4916     if (current_space->local_remaining<bnzi) {
4917       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4918       nspacedouble++;
4919     }
4920     /* copy data into free space, then initialize lnk */
4921     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4922     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4923 
4924     current_space->array           += bnzi;
4925     current_space->local_used      += bnzi;
4926     current_space->local_remaining -= bnzi;
4927 
4928     bi[i+1] = bi[i] + bnzi;
4929   }
4930 
4931   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4932 
4933   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4934   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4935   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4936 
4937   /* create symbolic parallel matrix B_mpi */
4938   /*---------------------------------------*/
4939   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4940   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4941   if (n==PETSC_DECIDE) {
4942     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4943   } else {
4944     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4945   }
4946   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4947   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4948   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4949   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4950   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4951 
4952   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4953   B_mpi->assembled  = PETSC_FALSE;
4954   merge->bi         = bi;
4955   merge->bj         = bj;
4956   merge->buf_ri     = buf_ri;
4957   merge->buf_rj     = buf_rj;
4958   merge->coi        = NULL;
4959   merge->coj        = NULL;
4960   merge->owners_co  = NULL;
4961 
4962   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4963 
4964   /* attach the supporting struct to B_mpi for reuse */
4965   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4966   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4967   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4968   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4969   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4970   *mpimat = B_mpi;
4971 
4972   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4973   PetscFunctionReturn(0);
4974 }
4975 
4976 /*@C
4977       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4978                  matrices from each processor
4979 
4980     Collective
4981 
4982    Input Parameters:
4983 +    comm - the communicators the parallel matrix will live on
4984 .    seqmat - the input sequential matrices
4985 .    m - number of local rows (or PETSC_DECIDE)
4986 .    n - number of local columns (or PETSC_DECIDE)
4987 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4988 
4989    Output Parameter:
4990 .    mpimat - the parallel matrix generated
4991 
4992     Level: advanced
4993 
4994    Notes:
4995      The dimensions of the sequential matrix in each processor MUST be the same.
4996      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4997      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4998 @*/
4999 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5000 {
5001   PetscErrorCode ierr;
5002   PetscMPIInt    size;
5003 
5004   PetscFunctionBegin;
5005   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5006   if (size == 1) {
5007     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5008     if (scall == MAT_INITIAL_MATRIX) {
5009       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5010     } else {
5011       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5012     }
5013     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5014     PetscFunctionReturn(0);
5015   }
5016   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5017   if (scall == MAT_INITIAL_MATRIX) {
5018     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5019   }
5020   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5021   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5022   PetscFunctionReturn(0);
5023 }
5024 
5025 /*@
5026      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5027           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5028           with MatGetSize()
5029 
5030     Not Collective
5031 
5032    Input Parameters:
5033 +    A - the matrix
5034 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5035 
5036    Output Parameter:
5037 .    A_loc - the local sequential matrix generated
5038 
5039     Level: developer
5040 
5041    Notes:
5042      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5043      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5044      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5045      modify the values of the returned A_loc.
5046 
5047 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5048 @*/
5049 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5050 {
5051   PetscErrorCode    ierr;
5052   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5053   Mat_SeqAIJ        *mat,*a,*b;
5054   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5055   const PetscScalar *aa,*ba,*aav,*bav;
5056   PetscScalar       *ca,*cam;
5057   PetscMPIInt       size;
5058   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5059   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5060   PetscBool         match;
5061 
5062   PetscFunctionBegin;
5063   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5064   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5065   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5066   if (size == 1) {
5067     if (scall == MAT_INITIAL_MATRIX) {
5068       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5069       *A_loc = mpimat->A;
5070     } else if (scall == MAT_REUSE_MATRIX) {
5071       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5072     }
5073     PetscFunctionReturn(0);
5074   }
5075 
5076   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5077   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5078   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5079   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5080   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5081   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5082   aa   = aav;
5083   ba   = bav;
5084   if (scall == MAT_INITIAL_MATRIX) {
5085     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5086     ci[0] = 0;
5087     for (i=0; i<am; i++) {
5088       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5089     }
5090     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5091     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5092     k    = 0;
5093     for (i=0; i<am; i++) {
5094       ncols_o = bi[i+1] - bi[i];
5095       ncols_d = ai[i+1] - ai[i];
5096       /* off-diagonal portion of A */
5097       for (jo=0; jo<ncols_o; jo++) {
5098         col = cmap[*bj];
5099         if (col >= cstart) break;
5100         cj[k]   = col; bj++;
5101         ca[k++] = *ba++;
5102       }
5103       /* diagonal portion of A */
5104       for (j=0; j<ncols_d; j++) {
5105         cj[k]   = cstart + *aj++;
5106         ca[k++] = *aa++;
5107       }
5108       /* off-diagonal portion of A */
5109       for (j=jo; j<ncols_o; j++) {
5110         cj[k]   = cmap[*bj++];
5111         ca[k++] = *ba++;
5112       }
5113     }
5114     /* put together the new matrix */
5115     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5116     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5117     /* Since these are PETSc arrays, change flags to free them as necessary. */
5118     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5119     mat->free_a  = PETSC_TRUE;
5120     mat->free_ij = PETSC_TRUE;
5121     mat->nonew   = 0;
5122   } else if (scall == MAT_REUSE_MATRIX) {
5123     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5124 #if defined(PETSC_USE_DEVICE)
5125     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5126 #endif
5127     ci = mat->i; cj = mat->j; cam = mat->a;
5128     for (i=0; i<am; i++) {
5129       /* off-diagonal portion of A */
5130       ncols_o = bi[i+1] - bi[i];
5131       for (jo=0; jo<ncols_o; jo++) {
5132         col = cmap[*bj];
5133         if (col >= cstart) break;
5134         *cam++ = *ba++; bj++;
5135       }
5136       /* diagonal portion of A */
5137       ncols_d = ai[i+1] - ai[i];
5138       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5139       /* off-diagonal portion of A */
5140       for (j=jo; j<ncols_o; j++) {
5141         *cam++ = *ba++; bj++;
5142       }
5143     }
5144   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5145   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5146   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5147   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5148   PetscFunctionReturn(0);
5149 }
5150 
5151 /*@
5152      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5153           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5154 
5155     Not Collective
5156 
5157    Input Parameters:
5158 +    A - the matrix
5159 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5160 
5161    Output Parameters:
5162 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5163 -    A_loc - the local sequential matrix generated
5164 
5165     Level: developer
5166 
5167    Notes:
5168      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5169 
5170 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5171 
5172 @*/
5173 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5174 {
5175   PetscErrorCode ierr;
5176   Mat            Ao,Ad;
5177   const PetscInt *cmap;
5178   PetscMPIInt    size;
5179   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5180 
5181   PetscFunctionBegin;
5182   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5183   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5184   if (size == 1) {
5185     if (scall == MAT_INITIAL_MATRIX) {
5186       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5187       *A_loc = Ad;
5188     } else if (scall == MAT_REUSE_MATRIX) {
5189       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5190     }
5191     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5192     PetscFunctionReturn(0);
5193   }
5194   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5195   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5196   if (f) {
5197     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5198   } else {
5199     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5200     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5201     Mat_SeqAIJ        *c;
5202     PetscInt          *ai = a->i, *aj = a->j;
5203     PetscInt          *bi = b->i, *bj = b->j;
5204     PetscInt          *ci,*cj;
5205     const PetscScalar *aa,*ba;
5206     PetscScalar       *ca;
5207     PetscInt          i,j,am,dn,on;
5208 
5209     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5210     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5211     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5212     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5213     if (scall == MAT_INITIAL_MATRIX) {
5214       PetscInt k;
5215       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5216       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5217       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5218       ci[0] = 0;
5219       for (i=0,k=0; i<am; i++) {
5220         const PetscInt ncols_o = bi[i+1] - bi[i];
5221         const PetscInt ncols_d = ai[i+1] - ai[i];
5222         ci[i+1] = ci[i] + ncols_o + ncols_d;
5223         /* diagonal portion of A */
5224         for (j=0; j<ncols_d; j++,k++) {
5225           cj[k] = *aj++;
5226           ca[k] = *aa++;
5227         }
5228         /* off-diagonal portion of A */
5229         for (j=0; j<ncols_o; j++,k++) {
5230           cj[k] = dn + *bj++;
5231           ca[k] = *ba++;
5232         }
5233       }
5234       /* put together the new matrix */
5235       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5236       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5237       /* Since these are PETSc arrays, change flags to free them as necessary. */
5238       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5239       c->free_a  = PETSC_TRUE;
5240       c->free_ij = PETSC_TRUE;
5241       c->nonew   = 0;
5242       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5243     } else if (scall == MAT_REUSE_MATRIX) {
5244 #if defined(PETSC_HAVE_DEVICE)
5245       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5246 #endif
5247       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5248       ca = c->a;
5249       for (i=0; i<am; i++) {
5250         const PetscInt ncols_d = ai[i+1] - ai[i];
5251         const PetscInt ncols_o = bi[i+1] - bi[i];
5252         /* diagonal portion of A */
5253         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5254         /* off-diagonal portion of A */
5255         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5256       }
5257     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5258     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5259     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5260     if (glob) {
5261       PetscInt cst, *gidx;
5262 
5263       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5264       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5265       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5266       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5267       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5268     }
5269   }
5270   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5271   PetscFunctionReturn(0);
5272 }
5273 
5274 /*@C
5275      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5276 
5277     Not Collective
5278 
5279    Input Parameters:
5280 +    A - the matrix
5281 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5282 -    row, col - index sets of rows and columns to extract (or NULL)
5283 
5284    Output Parameter:
5285 .    A_loc - the local sequential matrix generated
5286 
5287     Level: developer
5288 
5289 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5290 
5291 @*/
5292 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5293 {
5294   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5295   PetscErrorCode ierr;
5296   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5297   IS             isrowa,iscola;
5298   Mat            *aloc;
5299   PetscBool      match;
5300 
5301   PetscFunctionBegin;
5302   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5303   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5304   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5305   if (!row) {
5306     start = A->rmap->rstart; end = A->rmap->rend;
5307     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5308   } else {
5309     isrowa = *row;
5310   }
5311   if (!col) {
5312     start = A->cmap->rstart;
5313     cmap  = a->garray;
5314     nzA   = a->A->cmap->n;
5315     nzB   = a->B->cmap->n;
5316     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5317     ncols = 0;
5318     for (i=0; i<nzB; i++) {
5319       if (cmap[i] < start) idx[ncols++] = cmap[i];
5320       else break;
5321     }
5322     imark = i;
5323     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5324     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5325     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5326   } else {
5327     iscola = *col;
5328   }
5329   if (scall != MAT_INITIAL_MATRIX) {
5330     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5331     aloc[0] = *A_loc;
5332   }
5333   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5334   if (!col) { /* attach global id of condensed columns */
5335     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5336   }
5337   *A_loc = aloc[0];
5338   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5339   if (!row) {
5340     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5341   }
5342   if (!col) {
5343     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5344   }
5345   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5346   PetscFunctionReturn(0);
5347 }
5348 
5349 /*
5350  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5351  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5352  * on a global size.
5353  * */
5354 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5355 {
5356   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5357   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5358   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5359   PetscMPIInt              owner;
5360   PetscSFNode              *iremote,*oiremote;
5361   const PetscInt           *lrowindices;
5362   PetscErrorCode           ierr;
5363   PetscSF                  sf,osf;
5364   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5365   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5366   MPI_Comm                 comm;
5367   ISLocalToGlobalMapping   mapping;
5368 
5369   PetscFunctionBegin;
5370   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5371   /* plocalsize is the number of roots
5372    * nrows is the number of leaves
5373    * */
5374   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5375   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5376   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5377   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5378   for (i=0;i<nrows;i++) {
5379     /* Find a remote index and an owner for a row
5380      * The row could be local or remote
5381      * */
5382     owner = 0;
5383     lidx  = 0;
5384     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5385     iremote[i].index = lidx;
5386     iremote[i].rank  = owner;
5387   }
5388   /* Create SF to communicate how many nonzero columns for each row */
5389   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5390   /* SF will figure out the number of nonzero colunms for each row, and their
5391    * offsets
5392    * */
5393   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5394   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5395   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5396 
5397   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5398   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5399   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5400   roffsets[0] = 0;
5401   roffsets[1] = 0;
5402   for (i=0;i<plocalsize;i++) {
5403     /* diag */
5404     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5405     /* off diag */
5406     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5407     /* compute offsets so that we relative location for each row */
5408     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5409     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5410   }
5411   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5412   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5413   /* 'r' means root, and 'l' means leaf */
5414   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5415   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5416   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5417   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5418   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5419   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5420   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5421   dntotalcols = 0;
5422   ontotalcols = 0;
5423   ncol = 0;
5424   for (i=0;i<nrows;i++) {
5425     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5426     ncol = PetscMax(pnnz[i],ncol);
5427     /* diag */
5428     dntotalcols += nlcols[i*2+0];
5429     /* off diag */
5430     ontotalcols += nlcols[i*2+1];
5431   }
5432   /* We do not need to figure the right number of columns
5433    * since all the calculations will be done by going through the raw data
5434    * */
5435   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5436   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5437   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5438   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5439   /* diag */
5440   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5441   /* off diag */
5442   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5443   /* diag */
5444   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5445   /* off diag */
5446   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5447   dntotalcols = 0;
5448   ontotalcols = 0;
5449   ntotalcols  = 0;
5450   for (i=0;i<nrows;i++) {
5451     owner = 0;
5452     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5453     /* Set iremote for diag matrix */
5454     for (j=0;j<nlcols[i*2+0];j++) {
5455       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5456       iremote[dntotalcols].rank    = owner;
5457       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5458       ilocal[dntotalcols++]        = ntotalcols++;
5459     }
5460     /* off diag */
5461     for (j=0;j<nlcols[i*2+1];j++) {
5462       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5463       oiremote[ontotalcols].rank    = owner;
5464       oilocal[ontotalcols++]        = ntotalcols++;
5465     }
5466   }
5467   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5468   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5469   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5470   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5471   /* P serves as roots and P_oth is leaves
5472    * Diag matrix
5473    * */
5474   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5475   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5476   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5477 
5478   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5479   /* Off diag */
5480   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5481   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5482   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5483   /* We operate on the matrix internal data for saving memory */
5484   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5485   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5486   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5487   /* Convert to global indices for diag matrix */
5488   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5489   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5490   /* We want P_oth store global indices */
5491   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5492   /* Use memory scalable approach */
5493   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5494   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5495   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5496   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5497   /* Convert back to local indices */
5498   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5499   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5500   nout = 0;
5501   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5502   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5503   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5504   /* Exchange values */
5505   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5506   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5507   /* Stop PETSc from shrinking memory */
5508   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5509   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5510   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5511   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5512   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5513   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5514   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5515   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5516   PetscFunctionReturn(0);
5517 }
5518 
5519 /*
5520  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5521  * This supports MPIAIJ and MAIJ
5522  * */
5523 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5524 {
5525   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5526   Mat_SeqAIJ            *p_oth;
5527   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5528   IS                    rows,map;
5529   PetscHMapI            hamp;
5530   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5531   MPI_Comm              comm;
5532   PetscSF               sf,osf;
5533   PetscBool             has;
5534   PetscErrorCode        ierr;
5535 
5536   PetscFunctionBegin;
5537   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5538   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5539   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5540    *  and then create a submatrix (that often is an overlapping matrix)
5541    * */
5542   if (reuse == MAT_INITIAL_MATRIX) {
5543     /* Use a hash table to figure out unique keys */
5544     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5545     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5546     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5547     count = 0;
5548     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5549     for (i=0;i<a->B->cmap->n;i++) {
5550       key  = a->garray[i]/dof;
5551       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5552       if (!has) {
5553         mapping[i] = count;
5554         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5555       } else {
5556         /* Current 'i' has the same value the previous step */
5557         mapping[i] = count-1;
5558       }
5559     }
5560     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5561     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5562     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5563     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5564     off = 0;
5565     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5566     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5567     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5568     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5569     /* In case, the matrix was already created but users want to recreate the matrix */
5570     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5571     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5572     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5573     ierr = ISDestroy(&map);CHKERRQ(ierr);
5574     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5575   } else if (reuse == MAT_REUSE_MATRIX) {
5576     /* If matrix was already created, we simply update values using SF objects
5577      * that as attached to the matrix ealier.
5578      *  */
5579     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5580     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5581     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5582     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5583     /* Update values in place */
5584     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5585     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5586     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5587     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5588   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5589   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5590   PetscFunctionReturn(0);
5591 }
5592 
5593 /*@C
5594     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5595 
5596     Collective on Mat
5597 
5598    Input Parameters:
5599 +    A - the first matrix in mpiaij format
5600 .    B - the second matrix in mpiaij format
5601 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5602 
5603    Input/Output Parameters:
5604 +    rowb - index sets of rows of B to extract (or NULL), modified on output
5605 -    colb - index sets of columns of B to extract (or NULL), modified on output
5606 
5607    Output Parameter:
5608 .    B_seq - the sequential matrix generated
5609 
5610     Level: developer
5611 
5612 @*/
5613 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5614 {
5615   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5616   PetscErrorCode ierr;
5617   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5618   IS             isrowb,iscolb;
5619   Mat            *bseq=NULL;
5620 
5621   PetscFunctionBegin;
5622   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5623     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5624   }
5625   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5626 
5627   if (scall == MAT_INITIAL_MATRIX) {
5628     start = A->cmap->rstart;
5629     cmap  = a->garray;
5630     nzA   = a->A->cmap->n;
5631     nzB   = a->B->cmap->n;
5632     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5633     ncols = 0;
5634     for (i=0; i<nzB; i++) {  /* row < local row index */
5635       if (cmap[i] < start) idx[ncols++] = cmap[i];
5636       else break;
5637     }
5638     imark = i;
5639     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5640     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5641     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5642     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5643   } else {
5644     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5645     isrowb  = *rowb; iscolb = *colb;
5646     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5647     bseq[0] = *B_seq;
5648   }
5649   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5650   *B_seq = bseq[0];
5651   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5652   if (!rowb) {
5653     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5654   } else {
5655     *rowb = isrowb;
5656   }
5657   if (!colb) {
5658     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5659   } else {
5660     *colb = iscolb;
5661   }
5662   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5663   PetscFunctionReturn(0);
5664 }
5665 
5666 /*
5667     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5668     of the OFF-DIAGONAL portion of local A
5669 
5670     Collective on Mat
5671 
5672    Input Parameters:
5673 +    A,B - the matrices in mpiaij format
5674 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5675 
5676    Output Parameter:
5677 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5678 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5679 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5680 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5681 
5682     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5683      for this matrix. This is not desirable..
5684 
5685     Level: developer
5686 
5687 */
5688 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5689 {
5690   PetscErrorCode         ierr;
5691   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5692   Mat_SeqAIJ             *b_oth;
5693   VecScatter             ctx;
5694   MPI_Comm               comm;
5695   const PetscMPIInt      *rprocs,*sprocs;
5696   const PetscInt         *srow,*rstarts,*sstarts;
5697   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5698   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5699   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5700   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5701   PetscMPIInt            size,tag,rank,nreqs;
5702 
5703   PetscFunctionBegin;
5704   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5705   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5706 
5707   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5708     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5709   }
5710   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5711   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5712 
5713   if (size == 1) {
5714     startsj_s = NULL;
5715     bufa_ptr  = NULL;
5716     *B_oth    = NULL;
5717     PetscFunctionReturn(0);
5718   }
5719 
5720   ctx = a->Mvctx;
5721   tag = ((PetscObject)ctx)->tag;
5722 
5723   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5724   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5725   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5726   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5727   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5728   rwaits = reqs;
5729   swaits = reqs + nrecvs;
5730 
5731   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5732   if (scall == MAT_INITIAL_MATRIX) {
5733     /* i-array */
5734     /*---------*/
5735     /*  post receives */
5736     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5737     for (i=0; i<nrecvs; i++) {
5738       rowlen = rvalues + rstarts[i]*rbs;
5739       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5740       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5741     }
5742 
5743     /* pack the outgoing message */
5744     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5745 
5746     sstartsj[0] = 0;
5747     rstartsj[0] = 0;
5748     len         = 0; /* total length of j or a array to be sent */
5749     if (nsends) {
5750       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5751       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5752     }
5753     for (i=0; i<nsends; i++) {
5754       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5755       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5756       for (j=0; j<nrows; j++) {
5757         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5758         for (l=0; l<sbs; l++) {
5759           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5760 
5761           rowlen[j*sbs+l] = ncols;
5762 
5763           len += ncols;
5764           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5765         }
5766         k++;
5767       }
5768       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5769 
5770       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5771     }
5772     /* recvs and sends of i-array are completed */
5773     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5774     ierr = PetscFree(svalues);CHKERRQ(ierr);
5775 
5776     /* allocate buffers for sending j and a arrays */
5777     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5778     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5779 
5780     /* create i-array of B_oth */
5781     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5782 
5783     b_othi[0] = 0;
5784     len       = 0; /* total length of j or a array to be received */
5785     k         = 0;
5786     for (i=0; i<nrecvs; i++) {
5787       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5788       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5789       for (j=0; j<nrows; j++) {
5790         b_othi[k+1] = b_othi[k] + rowlen[j];
5791         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5792         k++;
5793       }
5794       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5795     }
5796     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5797 
5798     /* allocate space for j and a arrrays of B_oth */
5799     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5800     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5801 
5802     /* j-array */
5803     /*---------*/
5804     /*  post receives of j-array */
5805     for (i=0; i<nrecvs; i++) {
5806       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5807       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5808     }
5809 
5810     /* pack the outgoing message j-array */
5811     if (nsends) k = sstarts[0];
5812     for (i=0; i<nsends; i++) {
5813       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5814       bufJ  = bufj+sstartsj[i];
5815       for (j=0; j<nrows; j++) {
5816         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5817         for (ll=0; ll<sbs; ll++) {
5818           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5819           for (l=0; l<ncols; l++) {
5820             *bufJ++ = cols[l];
5821           }
5822           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5823         }
5824       }
5825       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5826     }
5827 
5828     /* recvs and sends of j-array are completed */
5829     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5830   } else if (scall == MAT_REUSE_MATRIX) {
5831     sstartsj = *startsj_s;
5832     rstartsj = *startsj_r;
5833     bufa     = *bufa_ptr;
5834     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5835     b_otha   = b_oth->a;
5836 #if defined(PETSC_HAVE_DEVICE)
5837     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5838 #endif
5839   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5840 
5841   /* a-array */
5842   /*---------*/
5843   /*  post receives of a-array */
5844   for (i=0; i<nrecvs; i++) {
5845     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5846     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5847   }
5848 
5849   /* pack the outgoing message a-array */
5850   if (nsends) k = sstarts[0];
5851   for (i=0; i<nsends; i++) {
5852     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5853     bufA  = bufa+sstartsj[i];
5854     for (j=0; j<nrows; j++) {
5855       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5856       for (ll=0; ll<sbs; ll++) {
5857         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5858         for (l=0; l<ncols; l++) {
5859           *bufA++ = vals[l];
5860         }
5861         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5862       }
5863     }
5864     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5865   }
5866   /* recvs and sends of a-array are completed */
5867   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5868   ierr = PetscFree(reqs);CHKERRQ(ierr);
5869 
5870   if (scall == MAT_INITIAL_MATRIX) {
5871     /* put together the new matrix */
5872     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5873 
5874     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5875     /* Since these are PETSc arrays, change flags to free them as necessary. */
5876     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5877     b_oth->free_a  = PETSC_TRUE;
5878     b_oth->free_ij = PETSC_TRUE;
5879     b_oth->nonew   = 0;
5880 
5881     ierr = PetscFree(bufj);CHKERRQ(ierr);
5882     if (!startsj_s || !bufa_ptr) {
5883       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5884       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5885     } else {
5886       *startsj_s = sstartsj;
5887       *startsj_r = rstartsj;
5888       *bufa_ptr  = bufa;
5889     }
5890   }
5891 
5892   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5893   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5894   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5895   PetscFunctionReturn(0);
5896 }
5897 
5898 /*@C
5899   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5900 
5901   Not Collective
5902 
5903   Input Parameter:
5904 . A - The matrix in mpiaij format
5905 
5906   Output Parameters:
5907 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5908 . colmap - A map from global column index to local index into lvec
5909 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5910 
5911   Level: developer
5912 
5913 @*/
5914 #if defined(PETSC_USE_CTABLE)
5915 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5916 #else
5917 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5918 #endif
5919 {
5920   Mat_MPIAIJ *a;
5921 
5922   PetscFunctionBegin;
5923   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5924   PetscValidPointer(lvec, 2);
5925   PetscValidPointer(colmap, 3);
5926   PetscValidPointer(multScatter, 4);
5927   a = (Mat_MPIAIJ*) A->data;
5928   if (lvec) *lvec = a->lvec;
5929   if (colmap) *colmap = a->colmap;
5930   if (multScatter) *multScatter = a->Mvctx;
5931   PetscFunctionReturn(0);
5932 }
5933 
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5937 #if defined(PETSC_HAVE_MKL_SPARSE)
5938 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5939 #endif
5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5942 #if defined(PETSC_HAVE_ELEMENTAL)
5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5944 #endif
5945 #if defined(PETSC_HAVE_SCALAPACK)
5946 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5947 #endif
5948 #if defined(PETSC_HAVE_HYPRE)
5949 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5950 #endif
5951 #if defined(PETSC_HAVE_CUDA)
5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5953 #endif
5954 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5956 #endif
5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5958 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5959 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5960 
5961 /*
5962     Computes (B'*A')' since computing B*A directly is untenable
5963 
5964                n                       p                          p
5965         [             ]       [             ]         [                 ]
5966       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5967         [             ]       [             ]         [                 ]
5968 
5969 */
5970 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5971 {
5972   PetscErrorCode ierr;
5973   Mat            At,Bt,Ct;
5974 
5975   PetscFunctionBegin;
5976   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5977   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5978   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5979   ierr = MatDestroy(&At);CHKERRQ(ierr);
5980   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5981   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5982   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5983   PetscFunctionReturn(0);
5984 }
5985 
5986 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5987 {
5988   PetscErrorCode ierr;
5989   PetscBool      cisdense;
5990 
5991   PetscFunctionBegin;
5992   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5993   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5994   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5995   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5996   if (!cisdense) {
5997     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5998   }
5999   ierr = MatSetUp(C);CHKERRQ(ierr);
6000 
6001   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6002   PetscFunctionReturn(0);
6003 }
6004 
6005 /* ----------------------------------------------------------------*/
6006 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6007 {
6008   Mat_Product *product = C->product;
6009   Mat         A = product->A,B=product->B;
6010 
6011   PetscFunctionBegin;
6012   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6013     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6014 
6015   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6016   C->ops->productsymbolic = MatProductSymbolic_AB;
6017   PetscFunctionReturn(0);
6018 }
6019 
6020 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6021 {
6022   PetscErrorCode ierr;
6023   Mat_Product    *product = C->product;
6024 
6025   PetscFunctionBegin;
6026   if (product->type == MATPRODUCT_AB) {
6027     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6028   }
6029   PetscFunctionReturn(0);
6030 }
6031 /* ----------------------------------------------------------------*/
6032 
6033 /*MC
6034    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6035 
6036    Options Database Keys:
6037 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6038 
6039    Level: beginner
6040 
6041    Notes:
6042     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6043     in this case the values associated with the rows and columns one passes in are set to zero
6044     in the matrix
6045 
6046     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6047     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6048 
6049 .seealso: MatCreateAIJ()
6050 M*/
6051 
6052 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6053 {
6054   Mat_MPIAIJ     *b;
6055   PetscErrorCode ierr;
6056   PetscMPIInt    size;
6057 
6058   PetscFunctionBegin;
6059   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6060 
6061   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6062   B->data       = (void*)b;
6063   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6064   B->assembled  = PETSC_FALSE;
6065   B->insertmode = NOT_SET_VALUES;
6066   b->size       = size;
6067 
6068   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6069 
6070   /* build cache for off array entries formed */
6071   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6072 
6073   b->donotstash  = PETSC_FALSE;
6074   b->colmap      = NULL;
6075   b->garray      = NULL;
6076   b->roworiented = PETSC_TRUE;
6077 
6078   /* stuff used for matrix vector multiply */
6079   b->lvec  = NULL;
6080   b->Mvctx = NULL;
6081 
6082   /* stuff for MatGetRow() */
6083   b->rowindices   = NULL;
6084   b->rowvalues    = NULL;
6085   b->getrowactive = PETSC_FALSE;
6086 
6087   /* flexible pointer used in CUSPARSE classes */
6088   b->spptr = NULL;
6089 
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6100 #if defined(PETSC_HAVE_CUDA)
6101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6102 #endif
6103 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6105 #endif
6106 #if defined(PETSC_HAVE_MKL_SPARSE)
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6108 #endif
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6113 #if defined(PETSC_HAVE_ELEMENTAL)
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6115 #endif
6116 #if defined(PETSC_HAVE_SCALAPACK)
6117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6118 #endif
6119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6121 #if defined(PETSC_HAVE_HYPRE)
6122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6124 #endif
6125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6127   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6128   PetscFunctionReturn(0);
6129 }
6130 
6131 /*@C
6132      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6133          and "off-diagonal" part of the matrix in CSR format.
6134 
6135    Collective
6136 
6137    Input Parameters:
6138 +  comm - MPI communicator
6139 .  m - number of local rows (Cannot be PETSC_DECIDE)
6140 .  n - This value should be the same as the local size used in creating the
6141        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6142        calculated if N is given) For square matrices n is almost always m.
6143 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6144 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6145 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6146 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6147 .   a - matrix values
6148 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6149 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6150 -   oa - matrix values
6151 
6152    Output Parameter:
6153 .   mat - the matrix
6154 
6155    Level: advanced
6156 
6157    Notes:
6158        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6159        must free the arrays once the matrix has been destroyed and not before.
6160 
6161        The i and j indices are 0 based
6162 
6163        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6164 
6165        This sets local rows and cannot be used to set off-processor values.
6166 
6167        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6168        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6169        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6170        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6171        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6172        communication if it is known that only local entries will be set.
6173 
6174 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6175           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6176 @*/
6177 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6178 {
6179   PetscErrorCode ierr;
6180   Mat_MPIAIJ     *maij;
6181 
6182   PetscFunctionBegin;
6183   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6184   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6185   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6186   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6187   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6188   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6189   maij = (Mat_MPIAIJ*) (*mat)->data;
6190 
6191   (*mat)->preallocated = PETSC_TRUE;
6192 
6193   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6194   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6195 
6196   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6197   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6198 
6199   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6200   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6201   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6202   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6203   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6204   PetscFunctionReturn(0);
6205 }
6206 
6207 /*
6208     Special version for direct calls from Fortran
6209 */
6210 #include <petsc/private/fortranimpl.h>
6211 
6212 /* Change these macros so can be used in void function */
6213 #undef CHKERRQ
6214 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6215 #undef SETERRQ2
6216 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6217 #undef SETERRQ3
6218 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6219 #undef SETERRQ
6220 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6221 
6222 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6223 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6224 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6225 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6226 #else
6227 #endif
6228 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6229 {
6230   Mat            mat  = *mmat;
6231   PetscInt       m    = *mm, n = *mn;
6232   InsertMode     addv = *maddv;
6233   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6234   PetscScalar    value;
6235   PetscErrorCode ierr;
6236 
6237   MatCheckPreallocated(mat,1);
6238   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6239   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6240   {
6241     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6242     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6243     PetscBool roworiented = aij->roworiented;
6244 
6245     /* Some Variables required in the macro */
6246     Mat        A                    = aij->A;
6247     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6248     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6249     MatScalar  *aa                  = a->a;
6250     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6251     Mat        B                    = aij->B;
6252     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6253     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6254     MatScalar  *ba                  = b->a;
6255     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6256      * cannot use "#if defined" inside a macro. */
6257     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6258 
6259     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6260     PetscInt  nonew = a->nonew;
6261     MatScalar *ap1,*ap2;
6262 
6263     PetscFunctionBegin;
6264     for (i=0; i<m; i++) {
6265       if (im[i] < 0) continue;
6266       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6267       if (im[i] >= rstart && im[i] < rend) {
6268         row      = im[i] - rstart;
6269         lastcol1 = -1;
6270         rp1      = aj + ai[row];
6271         ap1      = aa + ai[row];
6272         rmax1    = aimax[row];
6273         nrow1    = ailen[row];
6274         low1     = 0;
6275         high1    = nrow1;
6276         lastcol2 = -1;
6277         rp2      = bj + bi[row];
6278         ap2      = ba + bi[row];
6279         rmax2    = bimax[row];
6280         nrow2    = bilen[row];
6281         low2     = 0;
6282         high2    = nrow2;
6283 
6284         for (j=0; j<n; j++) {
6285           if (roworiented) value = v[i*n+j];
6286           else value = v[i+j*m];
6287           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6288           if (in[j] >= cstart && in[j] < cend) {
6289             col = in[j] - cstart;
6290             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6291 #if defined(PETSC_HAVE_DEVICE)
6292             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6293 #endif
6294           } else if (in[j] < 0) continue;
6295           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6296             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6297             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6298           } else {
6299             if (mat->was_assembled) {
6300               if (!aij->colmap) {
6301                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6302               }
6303 #if defined(PETSC_USE_CTABLE)
6304               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6305               col--;
6306 #else
6307               col = aij->colmap[in[j]] - 1;
6308 #endif
6309               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6310                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6311                 col  =  in[j];
6312                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6313                 B        = aij->B;
6314                 b        = (Mat_SeqAIJ*)B->data;
6315                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6316                 rp2      = bj + bi[row];
6317                 ap2      = ba + bi[row];
6318                 rmax2    = bimax[row];
6319                 nrow2    = bilen[row];
6320                 low2     = 0;
6321                 high2    = nrow2;
6322                 bm       = aij->B->rmap->n;
6323                 ba       = b->a;
6324                 inserted = PETSC_FALSE;
6325               }
6326             } else col = in[j];
6327             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6328 #if defined(PETSC_HAVE_DEVICE)
6329             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6330 #endif
6331           }
6332         }
6333       } else if (!aij->donotstash) {
6334         if (roworiented) {
6335           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6336         } else {
6337           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6338         }
6339       }
6340     }
6341   }
6342   PetscFunctionReturnVoid();
6343 }
6344 
6345 typedef struct {
6346   Mat       *mp;    /* intermediate products */
6347   PetscBool *mptmp; /* is the intermediate product temporary ? */
6348   PetscInt  cp;     /* number of intermediate products */
6349 
6350   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6351   PetscInt    *startsj_s,*startsj_r;
6352   PetscScalar *bufa;
6353   Mat         P_oth;
6354 
6355   /* may take advantage of merging product->B */
6356   Mat Bloc; /* B-local by merging diag and off-diag */
6357 
6358   /* cusparse does not have support to split between symbolic and numeric phases.
6359      When api_user is true, we don't need to update the numerical values
6360      of the temporary storage */
6361   PetscBool reusesym;
6362 
6363   /* support for COO values insertion */
6364   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6365   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6366   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6367   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6368   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6369   PetscMemType mtype;
6370 
6371   /* customization */
6372   PetscBool abmerge;
6373   PetscBool P_oth_bind;
6374 } MatMatMPIAIJBACKEND;
6375 
6376 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6377 {
6378   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6379   PetscInt            i;
6380   PetscErrorCode      ierr;
6381 
6382   PetscFunctionBegin;
6383   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6384   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6385   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6386   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6387   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6388   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6389   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6390   for (i = 0; i < mmdata->cp; i++) {
6391     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6392   }
6393   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6394   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6395   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6396   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6397   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6398   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6399   PetscFunctionReturn(0);
6400 }
6401 
6402 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6403 {
6404   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6405   PetscErrorCode ierr;
6406 
6407   PetscFunctionBegin;
6408   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6409   if (f) {
6410     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6411   } else {
6412     const PetscScalar *vv;
6413 
6414     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6415     if (n && idx) {
6416       PetscScalar    *w = v;
6417       const PetscInt *oi = idx;
6418       PetscInt       j;
6419 
6420       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6421     } else {
6422       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6423     }
6424     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6425   }
6426   PetscFunctionReturn(0);
6427 }
6428 
6429 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6430 {
6431   MatMatMPIAIJBACKEND *mmdata;
6432   PetscInt            i,n_d,n_o;
6433   PetscErrorCode      ierr;
6434 
6435   PetscFunctionBegin;
6436   MatCheckProduct(C,1);
6437   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6438   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6439   if (!mmdata->reusesym) { /* update temporary matrices */
6440     if (mmdata->P_oth) {
6441       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6442     }
6443     if (mmdata->Bloc) {
6444       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6445     }
6446   }
6447   mmdata->reusesym = PETSC_FALSE;
6448 
6449   for (i = 0; i < mmdata->cp; i++) {
6450     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6451     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6452   }
6453   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6454     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6455 
6456     if (mmdata->mptmp[i]) continue;
6457     if (noff) {
6458       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6459 
6460       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6461       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6462       n_o += noff;
6463       n_d += nown;
6464     } else {
6465       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6466 
6467       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6468       n_d += mm->nz;
6469     }
6470   }
6471   if (mmdata->hasoffproc) { /* offprocess insertion */
6472     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6473     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6474   }
6475   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6476   PetscFunctionReturn(0);
6477 }
6478 
6479 /* Support for Pt * A, A * P, or Pt * A * P */
6480 #define MAX_NUMBER_INTERMEDIATE 4
6481 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6482 {
6483   Mat_Product            *product = C->product;
6484   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6485   Mat_MPIAIJ             *a,*p;
6486   MatMatMPIAIJBACKEND    *mmdata;
6487   ISLocalToGlobalMapping P_oth_l2g = NULL;
6488   IS                     glob = NULL;
6489   const char             *prefix;
6490   char                   pprefix[256];
6491   const PetscInt         *globidx,*P_oth_idx;
6492   PetscInt               i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j;
6493   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6494                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6495                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6496   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6497 
6498   MatProductType         ptype;
6499   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6500   PetscMPIInt            size;
6501   PetscErrorCode         ierr;
6502 
6503   PetscFunctionBegin;
6504   MatCheckProduct(C,1);
6505   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6506   ptype = product->type;
6507   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6508     ptype = MATPRODUCT_AB;
6509     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6510   }
6511   switch (ptype) {
6512   case MATPRODUCT_AB:
6513     A = product->A;
6514     P = product->B;
6515     m = A->rmap->n;
6516     n = P->cmap->n;
6517     M = A->rmap->N;
6518     N = P->cmap->N;
6519     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6520     break;
6521   case MATPRODUCT_AtB:
6522     P = product->A;
6523     A = product->B;
6524     m = P->cmap->n;
6525     n = A->cmap->n;
6526     M = P->cmap->N;
6527     N = A->cmap->N;
6528     hasoffproc = PETSC_TRUE;
6529     break;
6530   case MATPRODUCT_PtAP:
6531     A = product->A;
6532     P = product->B;
6533     m = P->cmap->n;
6534     n = P->cmap->n;
6535     M = P->cmap->N;
6536     N = P->cmap->N;
6537     hasoffproc = PETSC_TRUE;
6538     break;
6539   default:
6540     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6541   }
6542   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6543   if (size == 1) hasoffproc = PETSC_FALSE;
6544 
6545   /* defaults */
6546   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6547     mp[i]    = NULL;
6548     mptmp[i] = PETSC_FALSE;
6549     rmapt[i] = -1;
6550     cmapt[i] = -1;
6551     rmapa[i] = NULL;
6552     cmapa[i] = NULL;
6553   }
6554 
6555   /* customization */
6556   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6557   mmdata->reusesym = product->api_user;
6558   if (ptype == MATPRODUCT_AB) {
6559     if (product->api_user) {
6560       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6561       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6562       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6563       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6564     } else {
6565       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6566       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6567       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6568       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6569     }
6570   } else if (ptype == MATPRODUCT_PtAP) {
6571     if (product->api_user) {
6572       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6573       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6574       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6575     } else {
6576       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6577       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6578       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6579     }
6580   }
6581   a = (Mat_MPIAIJ*)A->data;
6582   p = (Mat_MPIAIJ*)P->data;
6583   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6584   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6585   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6586   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6587   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6588 
6589   cp   = 0;
6590   switch (ptype) {
6591   case MATPRODUCT_AB: /* A * P */
6592     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6593 
6594     /* A_diag * P_local (merged or not) */
6595     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6596       /* P is product->B */
6597       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6598       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6599       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6600       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6601       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6602       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6603       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6604       mp[cp]->product->api_user = product->api_user;
6605       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6606       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6607       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6608       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6609       rmapt[cp] = 1;
6610       cmapt[cp] = 2;
6611       cmapa[cp] = globidx;
6612       mptmp[cp] = PETSC_FALSE;
6613       cp++;
6614     } else { /* A_diag * P_diag and A_diag * P_off */
6615       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6616       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6617       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6618       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6619       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6620       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6621       mp[cp]->product->api_user = product->api_user;
6622       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6623       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6624       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6625       rmapt[cp] = 1;
6626       cmapt[cp] = 1;
6627       mptmp[cp] = PETSC_FALSE;
6628       cp++;
6629       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6630       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6631       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6632       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6633       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6634       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6635       mp[cp]->product->api_user = product->api_user;
6636       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6637       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6638       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6639       rmapt[cp] = 1;
6640       cmapt[cp] = 2;
6641       cmapa[cp] = p->garray;
6642       mptmp[cp] = PETSC_FALSE;
6643       cp++;
6644     }
6645 
6646     /* A_off * P_other */
6647     if (mmdata->P_oth) {
6648       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
6649       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6650       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6651       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6652       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6653       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6654       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6655       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6656       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6657       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6658       mp[cp]->product->api_user = product->api_user;
6659       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6660       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6661       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6662       rmapt[cp] = 1;
6663       cmapt[cp] = 2;
6664       cmapa[cp] = P_oth_idx;
6665       mptmp[cp] = PETSC_FALSE;
6666       cp++;
6667     }
6668     break;
6669 
6670   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6671     /* A is product->B */
6672     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6673     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6674       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6675       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6676       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6677       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6678       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6679       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6680       mp[cp]->product->api_user = product->api_user;
6681       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6682       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6683       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6684       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6685       rmapt[cp] = 2;
6686       rmapa[cp] = globidx;
6687       cmapt[cp] = 2;
6688       cmapa[cp] = globidx;
6689       mptmp[cp] = PETSC_FALSE;
6690       cp++;
6691     } else {
6692       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6693       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6694       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6695       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6696       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6697       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6698       mp[cp]->product->api_user = product->api_user;
6699       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6700       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6701       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6702       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6703       rmapt[cp] = 1;
6704       cmapt[cp] = 2;
6705       cmapa[cp] = globidx;
6706       mptmp[cp] = PETSC_FALSE;
6707       cp++;
6708       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6709       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6710       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6711       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6712       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6713       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6714       mp[cp]->product->api_user = product->api_user;
6715       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6716       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6717       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6718       rmapt[cp] = 2;
6719       rmapa[cp] = p->garray;
6720       cmapt[cp] = 2;
6721       cmapa[cp] = globidx;
6722       mptmp[cp] = PETSC_FALSE;
6723       cp++;
6724     }
6725     break;
6726   case MATPRODUCT_PtAP:
6727     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6728     /* P is product->B */
6729     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6730     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6731     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6732     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6733     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6734     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6735     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6736     mp[cp]->product->api_user = product->api_user;
6737     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6738     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6739     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6740     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6741     rmapt[cp] = 2;
6742     rmapa[cp] = globidx;
6743     cmapt[cp] = 2;
6744     cmapa[cp] = globidx;
6745     mptmp[cp] = PETSC_FALSE;
6746     cp++;
6747     if (mmdata->P_oth) {
6748       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6749       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6750       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6751       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6752       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6753       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6754       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6755       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6756       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6757       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6758       mp[cp]->product->api_user = product->api_user;
6759       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6760       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6761       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6762       mptmp[cp] = PETSC_TRUE;
6763       cp++;
6764       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6765       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6766       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6767       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6768       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6769       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6770       mp[cp]->product->api_user = product->api_user;
6771       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6772       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6773       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6774       rmapt[cp] = 2;
6775       rmapa[cp] = globidx;
6776       cmapt[cp] = 2;
6777       cmapa[cp] = P_oth_idx;
6778       mptmp[cp] = PETSC_FALSE;
6779       cp++;
6780     }
6781     break;
6782   default:
6783     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6784   }
6785   /* sanity check */
6786   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6787 
6788   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
6789   for (i = 0; i < cp; i++) {
6790     mmdata->mp[i]    = mp[i];
6791     mmdata->mptmp[i] = mptmp[i];
6792   }
6793   mmdata->cp = cp;
6794   C->product->data       = mmdata;
6795   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6796   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6797 
6798   /* memory type */
6799   mmdata->mtype = PETSC_MEMTYPE_HOST;
6800   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6801   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6802   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6803   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6804   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6805 
6806   /* prepare coo coordinates for values insertion */
6807 
6808   /* count total nonzeros of those intermediate seqaij Mats
6809     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
6810     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
6811     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
6812   */
6813   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6814     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6815     if (mptmp[cp]) continue;
6816     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
6817       const PetscInt *rmap = rmapa[cp];
6818       const PetscInt mr = mp[cp]->rmap->n;
6819       const PetscInt rs = C->rmap->rstart;
6820       const PetscInt re = C->rmap->rend;
6821       const PetscInt *ii  = mm->i;
6822       for (i = 0; i < mr; i++) {
6823         const PetscInt gr = rmap[i];
6824         const PetscInt nz = ii[i+1] - ii[i];
6825         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
6826         else ncoo_oown += nz; /* this row is local */
6827       }
6828     } else ncoo_d += mm->nz;
6829   }
6830 
6831   /*
6832     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
6833 
6834     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
6835 
6836     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
6837 
6838     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
6839     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
6840     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
6841 
6842     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
6843     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
6844   */
6845   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
6846   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6847 
6848   /* gather (i,j) of nonzeros inserted by remote procs */
6849   if (hasoffproc) {
6850     PetscSF  msf;
6851     PetscInt ncoo2,*coo_i2,*coo_j2;
6852 
6853     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6854     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6855     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
6856 
6857     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6858       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6859       PetscInt   *idxoff = mmdata->off[cp];
6860       PetscInt   *idxown = mmdata->own[cp];
6861       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
6862         const PetscInt *rmap = rmapa[cp];
6863         const PetscInt *cmap = cmapa[cp];
6864         const PetscInt *ii  = mm->i;
6865         PetscInt       *coi = coo_i + ncoo_o;
6866         PetscInt       *coj = coo_j + ncoo_o;
6867         const PetscInt mr = mp[cp]->rmap->n;
6868         const PetscInt rs = C->rmap->rstart;
6869         const PetscInt re = C->rmap->rend;
6870         const PetscInt cs = C->cmap->rstart;
6871         for (i = 0; i < mr; i++) {
6872           const PetscInt *jj = mm->j + ii[i];
6873           const PetscInt gr  = rmap[i];
6874           const PetscInt nz  = ii[i+1] - ii[i];
6875           if (gr < rs || gr >= re) { /* this is an offproc row */
6876             for (j = ii[i]; j < ii[i+1]; j++) {
6877               *coi++ = gr;
6878               *idxoff++ = j;
6879             }
6880             if (!cmapt[cp]) { /* already global */
6881               for (j = 0; j < nz; j++) *coj++ = jj[j];
6882             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6883               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6884             } else { /* offdiag */
6885               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6886             }
6887             ncoo_o += nz;
6888           } else { /* this is a local row */
6889             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6890           }
6891         }
6892       }
6893       mmdata->off[cp + 1] = idxoff;
6894       mmdata->own[cp + 1] = idxown;
6895     }
6896 
6897     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6898     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6899     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6900     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
6901     ncoo = ncoo_d + ncoo_oown + ncoo2;
6902     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6903     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
6904     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6905     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6906     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6907     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6908     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
6909     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6910     coo_i = coo_i2;
6911     coo_j = coo_j2;
6912   } else { /* no offproc values insertion */
6913     ncoo = ncoo_d;
6914     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6915 
6916     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6917     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6918     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6919   }
6920   mmdata->hasoffproc = hasoffproc;
6921 
6922    /* gather (i,j) of nonzeros inserted locally */
6923   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6924     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6925     PetscInt       *coi = coo_i + ncoo_d;
6926     PetscInt       *coj = coo_j + ncoo_d;
6927     const PetscInt *jj  = mm->j;
6928     const PetscInt *ii  = mm->i;
6929     const PetscInt *cmap = cmapa[cp];
6930     const PetscInt *rmap = rmapa[cp];
6931     const PetscInt mr = mp[cp]->rmap->n;
6932     const PetscInt rs = C->rmap->rstart;
6933     const PetscInt re = C->rmap->rend;
6934     const PetscInt cs = C->cmap->rstart;
6935 
6936     if (mptmp[cp]) continue;
6937     if (rmapt[cp] == 1) { /* consecutive rows */
6938       /* fill coo_i */
6939       for (i = 0; i < mr; i++) {
6940         const PetscInt gr = i + rs;
6941         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6942       }
6943       /* fill coo_j */
6944       if (!cmapt[cp]) { /* type-0, already global */
6945         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6946       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
6947         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
6948       } else { /* type-2, local to global for sparse columns */
6949         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6950       }
6951       ncoo_d += mm->nz;
6952     } else if (rmapt[cp] == 2) { /* sparse rows */
6953       for (i = 0; i < mr; i++) {
6954         const PetscInt *jj = mm->j + ii[i];
6955         const PetscInt gr  = rmap[i];
6956         const PetscInt nz  = ii[i+1] - ii[i];
6957         if (gr >= rs && gr < re) { /* local rows */
6958           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6959           if (!cmapt[cp]) { /* type-0, already global */
6960             for (j = 0; j < nz; j++) *coj++ = jj[j];
6961           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6962             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6963           } else { /* type-2, local to global for sparse columns */
6964             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6965           }
6966           ncoo_d += nz;
6967         }
6968       }
6969     }
6970   }
6971   if (glob) {
6972     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6973   }
6974   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6975   if (P_oth_l2g) {
6976     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6977   }
6978   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6979   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
6980   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6981 
6982   /* preallocate with COO data */
6983   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6984   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6985   PetscFunctionReturn(0);
6986 }
6987 
6988 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6989 {
6990   Mat_Product    *product = mat->product;
6991   PetscErrorCode ierr;
6992 #if defined(PETSC_HAVE_DEVICE)
6993   PetscBool      match = PETSC_FALSE;
6994   PetscBool      usecpu = PETSC_FALSE;
6995 #else
6996   PetscBool      match = PETSC_TRUE;
6997 #endif
6998 
6999   PetscFunctionBegin;
7000   MatCheckProduct(mat,1);
7001 #if defined(PETSC_HAVE_DEVICE)
7002   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7003     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
7004   }
7005   if (match) { /* we can always fallback to the CPU if requested */
7006     switch (product->type) {
7007     case MATPRODUCT_AB:
7008       if (product->api_user) {
7009         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
7010         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7011         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7012       } else {
7013         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
7014         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7015         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7016       }
7017       break;
7018     case MATPRODUCT_AtB:
7019       if (product->api_user) {
7020         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7021         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7022         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7023       } else {
7024         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7025         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7026         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7027       }
7028       break;
7029     case MATPRODUCT_PtAP:
7030       if (product->api_user) {
7031         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7032         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7033         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7034       } else {
7035         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7036         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7037         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7038       }
7039       break;
7040     default:
7041       break;
7042     }
7043     match = (PetscBool)!usecpu;
7044   }
7045 #endif
7046   if (match) {
7047     switch (product->type) {
7048     case MATPRODUCT_AB:
7049     case MATPRODUCT_AtB:
7050     case MATPRODUCT_PtAP:
7051       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7052       break;
7053     default:
7054       break;
7055     }
7056   }
7057   /* fallback to MPIAIJ ops */
7058   if (!mat->ops->productsymbolic) {
7059     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7060   }
7061   PetscFunctionReturn(0);
7062 }
7063