xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a2fddd78f770fa4fc19a8af67e65be331f27d92b) !
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
66 {
67   PetscErrorCode ierr;
68   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
69 
70   PetscFunctionBegin;
71   if (mat->A) {
72     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
73     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
74   }
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
79 {
80   PetscErrorCode  ierr;
81   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
82   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
83   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
84   const PetscInt  *ia,*ib;
85   const MatScalar *aa,*bb,*aav,*bav;
86   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
87   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
88 
89   PetscFunctionBegin;
90   *keptrows = NULL;
91 
92   ia   = a->i;
93   ib   = b->i;
94   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
95   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) {
100       cnt++;
101       goto ok1;
102     }
103     aa = aav + ia[i];
104     for (j=0; j<na; j++) {
105       if (aa[j] != 0.0) goto ok1;
106     }
107     bb = bav + ib[i];
108     for (j=0; j <nb; j++) {
109       if (bb[j] != 0.0) goto ok1;
110     }
111     cnt++;
112 ok1:;
113   }
114   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
115   if (!n0rows) {
116     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
117     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
118     PetscFunctionReturn(0);
119   }
120   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
121   cnt  = 0;
122   for (i=0; i<m; i++) {
123     na = ia[i+1] - ia[i];
124     nb = ib[i+1] - ib[i];
125     if (!na && !nb) continue;
126     aa = aav + ia[i];
127     for (j=0; j<na;j++) {
128       if (aa[j] != 0.0) {
129         rows[cnt++] = rstart + i;
130         goto ok2;
131       }
132     }
133     bb = bav + ib[i];
134     for (j=0; j<nb; j++) {
135       if (bb[j] != 0.0) {
136         rows[cnt++] = rstart + i;
137         goto ok2;
138       }
139     }
140 ok2:;
141   }
142   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
143   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
149 {
150   PetscErrorCode    ierr;
151   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
152   PetscBool         cong;
153 
154   PetscFunctionBegin;
155   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
156   if (Y->assembled && cong) {
157     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
158   } else {
159     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
165 {
166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
167   PetscErrorCode ierr;
168   PetscInt       i,rstart,nrows,*rows;
169 
170   PetscFunctionBegin;
171   *zrows = NULL;
172   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
173   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
174   for (i=0; i<nrows; i++) rows[i] += rstart;
175   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
180 {
181   PetscErrorCode    ierr;
182   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
183   PetscInt          i,n,*garray = aij->garray;
184   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
185   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
186   PetscReal         *work;
187   const PetscScalar *dummy;
188 
189   PetscFunctionBegin;
190   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
191   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
192   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
193   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   if (type == NORM_2) {
197     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
198       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
199     }
200     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
201       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
202     }
203   } else if (type == NORM_1) {
204     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
205       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
206     }
207     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
208       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
209     }
210   } else if (type == NORM_INFINITY) {
211     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
212       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
213     }
214     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
215       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
216     }
217 
218   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
219   if (type == NORM_INFINITY) {
220     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
221   } else {
222     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
223   }
224   ierr = PetscFree(work);CHKERRQ(ierr);
225   if (type == NORM_2) {
226     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
227   }
228   PetscFunctionReturn(0);
229 }
230 
231 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
232 {
233   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
234   IS              sis,gis;
235   PetscErrorCode  ierr;
236   const PetscInt  *isis,*igis;
237   PetscInt        n,*iis,nsis,ngis,rstart,i;
238 
239   PetscFunctionBegin;
240   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
241   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
242   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
243   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
244   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
245   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
246 
247   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
248   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
249   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
250   n    = ngis + nsis;
251   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
252   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
253   for (i=0; i<n; i++) iis[i] += rstart;
254   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
255 
256   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
257   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
258   ierr = ISDestroy(&sis);CHKERRQ(ierr);
259   ierr = ISDestroy(&gis);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 /*
264   Local utility routine that creates a mapping from the global column
265 number to the local number in the off-diagonal part of the local
266 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
267 a slightly higher hash table cost; without it it is not scalable (each processor
268 has an order N integer array but is fast to access.
269 */
270 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
271 {
272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
273   PetscErrorCode ierr;
274   PetscInt       n = aij->B->cmap->n,i;
275 
276   PetscFunctionBegin;
277   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
278 #if defined(PETSC_USE_CTABLE)
279   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
280   for (i=0; i<n; i++) {
281     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
282   }
283 #else
284   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
285   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
286   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
287 #endif
288   PetscFunctionReturn(0);
289 }
290 
291 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
292 { \
293     if (col <= lastcol1)  low1 = 0;     \
294     else                 high1 = nrow1; \
295     lastcol1 = col;\
296     while (high1-low1 > 5) { \
297       t = (low1+high1)/2; \
298       if (rp1[t] > col) high1 = t; \
299       else              low1  = t; \
300     } \
301       for (_i=low1; _i<high1; _i++) { \
302         if (rp1[_i] > col) break; \
303         if (rp1[_i] == col) { \
304           if (addv == ADD_VALUES) { \
305             ap1[_i] += value;   \
306             /* Not sure LogFlops will slow dow the code or not */ \
307             (void)PetscLogFlops(1.0);   \
308            } \
309           else                    ap1[_i] = value; \
310           inserted = PETSC_TRUE; \
311           goto a_noinsert; \
312         } \
313       }  \
314       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
315       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
316       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
317       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
318       N = nrow1++ - 1; a->nz++; high1++; \
319       /* shift up all the later entries in this row */ \
320       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
321       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
322       rp1[_i] = col;  \
323       ap1[_i] = value;  \
324       A->nonzerostate++;\
325       a_noinsert: ; \
326       ailen[row] = nrow1; \
327 }
328 
329 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
330   { \
331     if (col <= lastcol2) low2 = 0;                        \
332     else high2 = nrow2;                                   \
333     lastcol2 = col;                                       \
334     while (high2-low2 > 5) {                              \
335       t = (low2+high2)/2;                                 \
336       if (rp2[t] > col) high2 = t;                        \
337       else             low2  = t;                         \
338     }                                                     \
339     for (_i=low2; _i<high2; _i++) {                       \
340       if (rp2[_i] > col) break;                           \
341       if (rp2[_i] == col) {                               \
342         if (addv == ADD_VALUES) {                         \
343           ap2[_i] += value;                               \
344           (void)PetscLogFlops(1.0);                       \
345         }                                                 \
346         else                    ap2[_i] = value;          \
347         inserted = PETSC_TRUE;                            \
348         goto b_noinsert;                                  \
349       }                                                   \
350     }                                                     \
351     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
352     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
353     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
354     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
355     N = nrow2++ - 1; b->nz++; high2++;                    \
356     /* shift up all the later entries in this row */      \
357     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
358     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
359     rp2[_i] = col;                                        \
360     ap2[_i] = value;                                      \
361     B->nonzerostate++;                                    \
362     b_noinsert: ;                                         \
363     bilen[row] = nrow2;                                   \
364   }
365 
366 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
367 {
368   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
369   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
370   PetscErrorCode ierr;
371   PetscInt       l,*garray = mat->garray,diag;
372 
373   PetscFunctionBegin;
374   /* code only works for square matrices A */
375 
376   /* find size of row to the left of the diagonal part */
377   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
378   row  = row - diag;
379   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
380     if (garray[b->j[b->i[row]+l]] > diag) break;
381   }
382   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
383 
384   /* diagonal part */
385   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
386 
387   /* right of diagonal part */
388   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
389 #if defined(PETSC_HAVE_DEVICE)
390   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
391 #endif
392   PetscFunctionReturn(0);
393 }
394 
395 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
396 {
397   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
398   PetscScalar    value = 0.0;
399   PetscErrorCode ierr;
400   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
401   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
402   PetscBool      roworiented = aij->roworiented;
403 
404   /* Some Variables required in the macro */
405   Mat        A                    = aij->A;
406   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
407   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
408   PetscBool  ignorezeroentries    = a->ignorezeroentries;
409   Mat        B                    = aij->B;
410   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
411   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
412   MatScalar  *aa,*ba;
413   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
414    * cannot use "#if defined" inside a macro. */
415   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
416 
417   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
418   PetscInt  nonew;
419   MatScalar *ap1,*ap2;
420 
421   PetscFunctionBegin;
422 #if defined(PETSC_HAVE_DEVICE)
423   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
424     const PetscScalar *dummy;
425     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
426     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
427   }
428   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
429     const PetscScalar *dummy;
430     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
431     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
432   }
433 #endif
434   aa = a->a;
435   ba = b->a;
436   for (i=0; i<m; i++) {
437     if (im[i] < 0) continue;
438     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
439     if (im[i] >= rstart && im[i] < rend) {
440       row      = im[i] - rstart;
441       lastcol1 = -1;
442       rp1      = aj + ai[row];
443       ap1      = aa + ai[row];
444       rmax1    = aimax[row];
445       nrow1    = ailen[row];
446       low1     = 0;
447       high1    = nrow1;
448       lastcol2 = -1;
449       rp2      = bj + bi[row];
450       ap2      = ba + bi[row];
451       rmax2    = bimax[row];
452       nrow2    = bilen[row];
453       low2     = 0;
454       high2    = nrow2;
455 
456       for (j=0; j<n; j++) {
457         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
458         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
459         if (in[j] >= cstart && in[j] < cend) {
460           col   = in[j] - cstart;
461           nonew = a->nonew;
462           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
463 #if defined(PETSC_HAVE_DEVICE)
464           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
465 #endif
466         } else if (in[j] < 0) continue;
467         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
468         else {
469           if (mat->was_assembled) {
470             if (!aij->colmap) {
471               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
472             }
473 #if defined(PETSC_USE_CTABLE)
474             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
475             col--;
476 #else
477             col = aij->colmap[in[j]] - 1;
478 #endif
479             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
480               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
481               col  =  in[j];
482               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
483               B        = aij->B;
484               b        = (Mat_SeqAIJ*)B->data;
485               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
486               rp2      = bj + bi[row];
487               ap2      = ba + bi[row];
488               rmax2    = bimax[row];
489               nrow2    = bilen[row];
490               low2     = 0;
491               high2    = nrow2;
492               bm       = aij->B->rmap->n;
493               ba       = b->a;
494               inserted = PETSC_FALSE;
495             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
496               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
497                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
498               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
499             }
500           } else col = in[j];
501           nonew = b->nonew;
502           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
503 #if defined(PETSC_HAVE_DEVICE)
504           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
505 #endif
506         }
507       }
508     } else {
509       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
510       if (!aij->donotstash) {
511         mat->assembled = PETSC_FALSE;
512         if (roworiented) {
513           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
514         } else {
515           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
516         }
517       }
518     }
519   }
520   PetscFunctionReturn(0);
521 }
522 
523 /*
524     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
525     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
526     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
527 */
528 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
529 {
530   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
531   Mat            A           = aij->A; /* diagonal part of the matrix */
532   Mat            B           = aij->B; /* offdiagonal part of the matrix */
533   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
534   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
535   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
536   PetscInt       *ailen      = a->ilen,*aj = a->j;
537   PetscInt       *bilen      = b->ilen,*bj = b->j;
538   PetscInt       am          = aij->A->rmap->n,j;
539   PetscInt       diag_so_far = 0,dnz;
540   PetscInt       offd_so_far = 0,onz;
541 
542   PetscFunctionBegin;
543   /* Iterate over all rows of the matrix */
544   for (j=0; j<am; j++) {
545     dnz = onz = 0;
546     /*  Iterate over all non-zero columns of the current row */
547     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
548       /* If column is in the diagonal */
549       if (mat_j[col] >= cstart && mat_j[col] < cend) {
550         aj[diag_so_far++] = mat_j[col] - cstart;
551         dnz++;
552       } else { /* off-diagonal entries */
553         bj[offd_so_far++] = mat_j[col];
554         onz++;
555       }
556     }
557     ailen[j] = dnz;
558     bilen[j] = onz;
559   }
560   PetscFunctionReturn(0);
561 }
562 
563 /*
564     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
565     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
566     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
567     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
568     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
569 */
570 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
571 {
572   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
573   Mat            A      = aij->A; /* diagonal part of the matrix */
574   Mat            B      = aij->B; /* offdiagonal part of the matrix */
575   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
576   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
577   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
578   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
579   PetscInt       *ailen = a->ilen,*aj = a->j;
580   PetscInt       *bilen = b->ilen,*bj = b->j;
581   PetscInt       am     = aij->A->rmap->n,j;
582   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
583   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
584   PetscScalar    *aa = a->a,*ba = b->a;
585 
586   PetscFunctionBegin;
587   /* Iterate over all rows of the matrix */
588   for (j=0; j<am; j++) {
589     dnz_row = onz_row = 0;
590     rowstart_offd = full_offd_i[j];
591     rowstart_diag = full_diag_i[j];
592     /*  Iterate over all non-zero columns of the current row */
593     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
594       /* If column is in the diagonal */
595       if (mat_j[col] >= cstart && mat_j[col] < cend) {
596         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
597         aa[rowstart_diag+dnz_row] = mat_a[col];
598         dnz_row++;
599       } else { /* off-diagonal entries */
600         bj[rowstart_offd+onz_row] = mat_j[col];
601         ba[rowstart_offd+onz_row] = mat_a[col];
602         onz_row++;
603       }
604     }
605     ailen[j] = dnz_row;
606     bilen[j] = onz_row;
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
612 {
613   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
614   PetscErrorCode ierr;
615   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
616   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
617 
618   PetscFunctionBegin;
619   for (i=0; i<m; i++) {
620     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
621     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
622     if (idxm[i] >= rstart && idxm[i] < rend) {
623       row = idxm[i] - rstart;
624       for (j=0; j<n; j++) {
625         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
626         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
627         if (idxn[j] >= cstart && idxn[j] < cend) {
628           col  = idxn[j] - cstart;
629           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
630         } else {
631           if (!aij->colmap) {
632             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
633           }
634 #if defined(PETSC_USE_CTABLE)
635           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
636           col--;
637 #else
638           col = aij->colmap[idxn[j]] - 1;
639 #endif
640           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
641           else {
642             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
643           }
644         }
645       }
646     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
647   }
648   PetscFunctionReturn(0);
649 }
650 
651 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   PetscErrorCode ierr;
655   PetscInt       nstash,reallocs;
656 
657   PetscFunctionBegin;
658   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscMPIInt    n;
671   PetscInt       i,j,rstart,ncols,flg;
672   PetscInt       *row,*col;
673   PetscBool      other_disassembled;
674   PetscScalar    *val;
675 
676   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
677 
678   PetscFunctionBegin;
679   if (!aij->donotstash && !mat->nooffprocentries) {
680     while (1) {
681       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
682       if (!flg) break;
683 
684       for (i=0; i<n;) {
685         /* Now identify the consecutive vals belonging to the same row */
686         for (j=i,rstart=row[j]; j<n; j++) {
687           if (row[j] != rstart) break;
688         }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
693         i    = j;
694       }
695     }
696     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
697   }
698 #if defined(PETSC_HAVE_DEVICE)
699   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
700   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
701   if (mat->boundtocpu) {
702     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
703     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
704   }
705 #endif
706   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
708 
709   /* determine if any processor has disassembled, if so we must
710      also disassemble ourself, in order that we may reassemble. */
711   /*
712      if nonzero structure of submatrix B cannot change then we know that
713      no processor disassembled thus we can skip this stuff
714   */
715   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
716     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
717     if (mat->was_assembled && !other_disassembled) {
718 #if defined(PETSC_HAVE_DEVICE)
719       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
720 #endif
721       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
722     }
723   }
724   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
725     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
726   }
727   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
728 #if defined(PETSC_HAVE_DEVICE)
729   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
730 #endif
731   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
733 
734   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
735 
736   aij->rowvalues = NULL;
737 
738   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
739 
740   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
741   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
742     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
743     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
744   }
745 #if defined(PETSC_HAVE_DEVICE)
746   mat->offloadmask = PETSC_OFFLOAD_BOTH;
747 #endif
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
752 {
753   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
758   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
763 {
764   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
765   PetscObjectState sA, sB;
766   PetscInt        *lrows;
767   PetscInt         r, len;
768   PetscBool        cong, lch, gch;
769   PetscErrorCode   ierr;
770 
771   PetscFunctionBegin;
772   /* get locally owned rows */
773   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
774   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
775   /* fix right hand side if needed */
776   if (x && b) {
777     const PetscScalar *xx;
778     PetscScalar       *bb;
779 
780     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
781     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
782     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
783     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
784     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
785     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
786   }
787 
788   sA = mat->A->nonzerostate;
789   sB = mat->B->nonzerostate;
790 
791   if (diag != 0.0 && cong) {
792     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
793     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
794   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
795     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
796     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
797     PetscInt   nnwA, nnwB;
798     PetscBool  nnzA, nnzB;
799 
800     nnwA = aijA->nonew;
801     nnwB = aijB->nonew;
802     nnzA = aijA->keepnonzeropattern;
803     nnzB = aijB->keepnonzeropattern;
804     if (!nnzA) {
805       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
806       aijA->nonew = 0;
807     }
808     if (!nnzB) {
809       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
810       aijB->nonew = 0;
811     }
812     /* Must zero here before the next loop */
813     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
814     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
815     for (r = 0; r < len; ++r) {
816       const PetscInt row = lrows[r] + A->rmap->rstart;
817       if (row >= A->cmap->N) continue;
818       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
819     }
820     aijA->nonew = nnwA;
821     aijB->nonew = nnwB;
822   } else {
823     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
824     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
828   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829 
830   /* reduce nonzerostate */
831   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
832   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
833   if (gch) A->nonzerostate++;
834   PetscFunctionReturn(0);
835 }
836 
837 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
838 {
839   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
840   PetscErrorCode    ierr;
841   PetscMPIInt       n = A->rmap->n;
842   PetscInt          i,j,r,m,len = 0;
843   PetscInt          *lrows,*owners = A->rmap->range;
844   PetscMPIInt       p = 0;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x && b) { /* this code is buggy when the row and column layout don't match */
888     PetscBool cong;
889 
890     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
891     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
892     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
895     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
896   }
897   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
898   /* remove zeroed rows of off diagonal matrix */
899   ii = aij->i;
900   for (i=0; i<len; i++) {
901     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
902   }
903   /* loop over all elements of off process part of matrix zeroing removed columns*/
904   if (aij->compressedrow.use) {
905     m    = aij->compressedrow.nrows;
906     ii   = aij->compressedrow.i;
907     ridx = aij->compressedrow.rindex;
908     for (i=0; i<m; i++) {
909       n  = ii[i+1] - ii[i];
910       aj = aij->j + ii[i];
911       aa = aij->a + ii[i];
912 
913       for (j=0; j<n; j++) {
914         if (PetscAbsScalar(mask[*aj])) {
915           if (b) bb[*ridx] -= *aa*xx[*aj];
916           *aa = 0.0;
917         }
918         aa++;
919         aj++;
920       }
921       ridx++;
922     }
923   } else { /* do not use compressed row format */
924     m = l->B->rmap->n;
925     for (i=0; i<m; i++) {
926       n  = ii[i+1] - ii[i];
927       aj = aij->j + ii[i];
928       aa = aij->a + ii[i];
929       for (j=0; j<n; j++) {
930         if (PetscAbsScalar(mask[*aj])) {
931           if (b) bb[i] -= *aa*xx[*aj];
932           *aa = 0.0;
933         }
934         aa++;
935         aj++;
936       }
937     }
938   }
939   if (x && b) {
940     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
941     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
942   }
943   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
944   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
945   ierr = PetscFree(lrows);CHKERRQ(ierr);
946 
947   /* only change matrix nonzero state if pattern was allowed to be changed */
948   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
949     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
950     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
951   }
952   PetscFunctionReturn(0);
953 }
954 
955 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
956 {
957   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
958   PetscErrorCode ierr;
959   PetscInt       nt;
960   VecScatter     Mvctx = a->Mvctx;
961 
962   PetscFunctionBegin;
963   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
964   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
965   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
967   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976 
977   PetscFunctionBegin;
978   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
983 {
984   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
985   PetscErrorCode ierr;
986   VecScatter     Mvctx = a->Mvctx;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
997 {
998   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
999   PetscErrorCode ierr;
1000 
1001   PetscFunctionBegin;
1002   /* do nondiagonal part */
1003   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1004   /* do local part */
1005   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1006   /* add partial results together */
1007   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1008   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscBool      lf;
1021   PetscMPIInt    size;
1022 
1023   PetscFunctionBegin;
1024   /* Easy test: symmetric diagonal block */
1025   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1026   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1027   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1028   if (!*f) PetscFunctionReturn(0);
1029   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1030   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1031   if (size == 1) PetscFunctionReturn(0);
1032 
1033   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1034   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1035   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1036   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1037   for (i=0; i<first; i++) notme[i] = i;
1038   for (i=last; i<M; i++) notme[i-last+first] = i;
1039   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1040   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1041   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1042   Aoff = Aoffs[0];
1043   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1044   Boff = Boffs[0];
1045   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1046   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1047   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1048   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1049   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1050   ierr = PetscFree(notme);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1055 {
1056   PetscErrorCode ierr;
1057 
1058   PetscFunctionBegin;
1059   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1064 {
1065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1066   PetscErrorCode ierr;
1067 
1068   PetscFunctionBegin;
1069   /* do nondiagonal part */
1070   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1071   /* do local part */
1072   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1073   /* add partial results together */
1074   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /*
1080   This only works correctly for square matrices where the subblock A->A is the
1081    diagonal block
1082 */
1083 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1084 {
1085   PetscErrorCode ierr;
1086   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1087 
1088   PetscFunctionBegin;
1089   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1090   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1091   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1102   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1107 {
1108   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112 #if defined(PETSC_USE_LOG)
1113   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1114 #endif
1115   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1116   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1117   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1118   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1119 #if defined(PETSC_USE_CTABLE)
1120   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1121 #else
1122   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1123 #endif
1124   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1125   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1126   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1127   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1128   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1129   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1130 
1131   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1132   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1133 
1134   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1144 #if defined(PETSC_HAVE_CUDA)
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1146 #endif
1147 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1149 #endif
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1151 #if defined(PETSC_HAVE_ELEMENTAL)
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1153 #endif
1154 #if defined(PETSC_HAVE_SCALAPACK)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1156 #endif
1157 #if defined(PETSC_HAVE_HYPRE)
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1160 #endif
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1167 #if defined(PETSC_HAVE_MKL_SPARSE)
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1177 {
1178   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1179   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1180   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1181   const PetscInt    *garray = aij->garray;
1182   const PetscScalar *aa,*ba;
1183   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1184   PetscInt          *rowlens;
1185   PetscInt          *colidxs;
1186   PetscScalar       *matvals;
1187   PetscErrorCode    ierr;
1188 
1189   PetscFunctionBegin;
1190   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1191 
1192   M  = mat->rmap->N;
1193   N  = mat->cmap->N;
1194   m  = mat->rmap->n;
1195   rs = mat->rmap->rstart;
1196   cs = mat->cmap->rstart;
1197   nz = A->nz + B->nz;
1198 
1199   /* write matrix header */
1200   header[0] = MAT_FILE_CLASSID;
1201   header[1] = M; header[2] = N; header[3] = nz;
1202   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1203   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1204 
1205   /* fill in and store row lengths  */
1206   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1207   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1208   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1209   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1210 
1211   /* fill in and store column indices */
1212   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1213   for (cnt=0, i=0; i<m; i++) {
1214     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1215       if (garray[B->j[jb]] > cs) break;
1216       colidxs[cnt++] = garray[B->j[jb]];
1217     }
1218     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1219       colidxs[cnt++] = A->j[ja] + cs;
1220     for (; jb<B->i[i+1]; jb++)
1221       colidxs[cnt++] = garray[B->j[jb]];
1222   }
1223   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1224   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1225   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1226 
1227   /* fill in and store nonzero values */
1228   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1229   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1230   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1231   for (cnt=0, i=0; i<m; i++) {
1232     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1233       if (garray[B->j[jb]] > cs) break;
1234       matvals[cnt++] = ba[jb];
1235     }
1236     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1237       matvals[cnt++] = aa[ja];
1238     for (; jb<B->i[i+1]; jb++)
1239       matvals[cnt++] = ba[jb];
1240   }
1241   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1242   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1243   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1244   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1245   ierr = PetscFree(matvals);CHKERRQ(ierr);
1246 
1247   /* write block size option to the viewer's .info file */
1248   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #include <petscdraw.h>
1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1254 {
1255   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1256   PetscErrorCode    ierr;
1257   PetscMPIInt       rank = aij->rank,size = aij->size;
1258   PetscBool         isdraw,iascii,isbinary;
1259   PetscViewer       sviewer;
1260   PetscViewerFormat format;
1261 
1262   PetscFunctionBegin;
1263   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1264   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1265   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1266   if (iascii) {
1267     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1268     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1269       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1270       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1271       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1272       for (i=0; i<(PetscInt)size; i++) {
1273         nmax = PetscMax(nmax,nz[i]);
1274         nmin = PetscMin(nmin,nz[i]);
1275         navg += nz[i];
1276       }
1277       ierr = PetscFree(nz);CHKERRQ(ierr);
1278       navg = navg/size;
1279       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1280       PetscFunctionReturn(0);
1281     }
1282     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1283     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1284       MatInfo   info;
1285       PetscInt *inodes=NULL;
1286 
1287       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1288       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1289       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1290       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1291       if (!inodes) {
1292         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1293                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1294       } else {
1295         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1296                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1297       }
1298       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1299       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1300       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1301       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1302       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1304       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1305       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1306       PetscFunctionReturn(0);
1307     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1308       PetscInt inodecount,inodelimit,*inodes;
1309       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1310       if (inodes) {
1311         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1312       } else {
1313         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1314       }
1315       PetscFunctionReturn(0);
1316     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1317       PetscFunctionReturn(0);
1318     }
1319   } else if (isbinary) {
1320     if (size == 1) {
1321       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1322       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1323     } else {
1324       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1325     }
1326     PetscFunctionReturn(0);
1327   } else if (iascii && size == 1) {
1328     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1329     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1336     if (isnull) PetscFunctionReturn(0);
1337   }
1338 
1339   { /* assemble the entire matrix onto first processor */
1340     Mat A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1344     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1345     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1346     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1347 /*  The commented code uses MatCreateSubMatrices instead */
1348 /*
1349     Mat *AA, A = NULL, Av;
1350     IS  isrow,iscol;
1351 
1352     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1353     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1354     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1355     if (!rank) {
1356        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1357        A    = AA[0];
1358        Av   = AA[0];
1359     }
1360     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1361 */
1362     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1363     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1364     /*
1365        Everyone has to call to draw the matrix since the graphics waits are
1366        synchronized across all processors that share the PetscDraw object
1367     */
1368     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1369     if (!rank) {
1370       if (((PetscObject)mat)->name) {
1371         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1372       }
1373       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1374     }
1375     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1376     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1377     ierr = MatDestroy(&A);CHKERRQ(ierr);
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1383 {
1384   PetscErrorCode ierr;
1385   PetscBool      iascii,isdraw,issocket,isbinary;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1392   if (iascii || isdraw || isbinary || issocket) {
1393     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1399 {
1400   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1401   PetscErrorCode ierr;
1402   Vec            bb1 = NULL;
1403   PetscBool      hasop;
1404 
1405   PetscFunctionBegin;
1406   if (flag == SOR_APPLY_UPPER) {
1407     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1408     PetscFunctionReturn(0);
1409   }
1410 
1411   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1412     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1413   }
1414 
1415   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1416     if (flag & SOR_ZERO_INITIAL_GUESS) {
1417       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1418       its--;
1419     }
1420 
1421     while (its--) {
1422       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1423       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424 
1425       /* update rhs: bb1 = bb - B*x */
1426       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1427       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1428 
1429       /* local sweep */
1430       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1431     }
1432   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1433     if (flag & SOR_ZERO_INITIAL_GUESS) {
1434       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1435       its--;
1436     }
1437     while (its--) {
1438       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1440 
1441       /* update rhs: bb1 = bb - B*x */
1442       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1443       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1444 
1445       /* local sweep */
1446       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1447     }
1448   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1449     if (flag & SOR_ZERO_INITIAL_GUESS) {
1450       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451       its--;
1452     }
1453     while (its--) {
1454       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1456 
1457       /* update rhs: bb1 = bb - B*x */
1458       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1459       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1460 
1461       /* local sweep */
1462       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1463     }
1464   } else if (flag & SOR_EISENSTAT) {
1465     Vec xx1;
1466 
1467     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1468     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1469 
1470     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472     if (!mat->diag) {
1473       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1474       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1475     }
1476     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1477     if (hasop) {
1478       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1479     } else {
1480       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1481     }
1482     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1483 
1484     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1485 
1486     /* local sweep */
1487     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1488     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1489     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1490   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1491 
1492   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1493 
1494   matin->factorerrortype = mat->A->factorerrortype;
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1499 {
1500   Mat            aA,aB,Aperm;
1501   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1502   PetscScalar    *aa,*ba;
1503   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1504   PetscSF        rowsf,sf;
1505   IS             parcolp = NULL;
1506   PetscBool      done;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1511   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1512   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1513   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1514 
1515   /* Invert row permutation to find out where my rows should go */
1516   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1517   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1518   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1519   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1520   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1521   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1522 
1523   /* Invert column permutation to find out where my columns should go */
1524   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1525   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1526   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1527   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1528   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1529   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1531 
1532   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1533   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1534   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1535 
1536   /* Find out where my gcols should go */
1537   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1538   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1542   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1543   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1545 
1546   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1547   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1548   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1549   for (i=0; i<m; i++) {
1550     PetscInt    row = rdest[i];
1551     PetscMPIInt rowner;
1552     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1553     for (j=ai[i]; j<ai[i+1]; j++) {
1554       PetscInt    col = cdest[aj[j]];
1555       PetscMPIInt cowner;
1556       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1557       if (rowner == cowner) dnnz[i]++;
1558       else onnz[i]++;
1559     }
1560     for (j=bi[i]; j<bi[i+1]; j++) {
1561       PetscInt    col = gcdest[bj[j]];
1562       PetscMPIInt cowner;
1563       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567   }
1568   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1570   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1571   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1573 
1574   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1575   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1576   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1577   for (i=0; i<m; i++) {
1578     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1579     PetscInt j0,rowlen;
1580     rowlen = ai[i+1] - ai[i];
1581     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1582       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1583       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1584     }
1585     rowlen = bi[i+1] - bi[i];
1586     for (j0=j=0; j<rowlen; j0=j) {
1587       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1588       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1589     }
1590   }
1591   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1592   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1593   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1594   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1595   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1596   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1597   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1598   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1599   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1600   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1601   *B = Aperm;
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1606 {
1607   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1612   if (ghosts) *ghosts = aij->garray;
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1617 {
1618   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1619   Mat            A    = mat->A,B = mat->B;
1620   PetscErrorCode ierr;
1621   PetscLogDouble isend[5],irecv[5];
1622 
1623   PetscFunctionBegin;
1624   info->block_size = 1.0;
1625   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1626 
1627   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1628   isend[3] = info->memory;  isend[4] = info->mallocs;
1629 
1630   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1631 
1632   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1633   isend[3] += info->memory;  isend[4] += info->mallocs;
1634   if (flag == MAT_LOCAL) {
1635     info->nz_used      = isend[0];
1636     info->nz_allocated = isend[1];
1637     info->nz_unneeded  = isend[2];
1638     info->memory       = isend[3];
1639     info->mallocs      = isend[4];
1640   } else if (flag == MAT_GLOBAL_MAX) {
1641     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1642 
1643     info->nz_used      = irecv[0];
1644     info->nz_allocated = irecv[1];
1645     info->nz_unneeded  = irecv[2];
1646     info->memory       = irecv[3];
1647     info->mallocs      = irecv[4];
1648   } else if (flag == MAT_GLOBAL_SUM) {
1649     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1650 
1651     info->nz_used      = irecv[0];
1652     info->nz_allocated = irecv[1];
1653     info->nz_unneeded  = irecv[2];
1654     info->memory       = irecv[3];
1655     info->mallocs      = irecv[4];
1656   }
1657   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1658   info->fill_ratio_needed = 0;
1659   info->factor_mallocs    = 0;
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1664 {
1665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   switch (op) {
1670   case MAT_NEW_NONZERO_LOCATIONS:
1671   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1672   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1673   case MAT_KEEP_NONZERO_PATTERN:
1674   case MAT_NEW_NONZERO_LOCATION_ERR:
1675   case MAT_USE_INODES:
1676   case MAT_IGNORE_ZERO_ENTRIES:
1677   case MAT_FORM_EXPLICIT_TRANSPOSE:
1678     MatCheckPreallocated(A,1);
1679     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1680     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1681     break;
1682   case MAT_ROW_ORIENTED:
1683     MatCheckPreallocated(A,1);
1684     a->roworiented = flg;
1685 
1686     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1687     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1688     break;
1689   case MAT_FORCE_DIAGONAL_ENTRIES:
1690   case MAT_SORTED_FULL:
1691     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1692     break;
1693   case MAT_IGNORE_OFF_PROC_ENTRIES:
1694     a->donotstash = flg;
1695     break;
1696   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1697   case MAT_SPD:
1698   case MAT_SYMMETRIC:
1699   case MAT_STRUCTURALLY_SYMMETRIC:
1700   case MAT_HERMITIAN:
1701   case MAT_SYMMETRY_ETERNAL:
1702     break;
1703   case MAT_SUBMAT_SINGLEIS:
1704     A->submat_singleis = flg;
1705     break;
1706   case MAT_STRUCTURE_ONLY:
1707     /* The option is handled directly by MatSetOption() */
1708     break;
1709   default:
1710     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1711   }
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1716 {
1717   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1718   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1719   PetscErrorCode ierr;
1720   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1721   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1722   PetscInt       *cmap,*idx_p;
1723 
1724   PetscFunctionBegin;
1725   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1726   mat->getrowactive = PETSC_TRUE;
1727 
1728   if (!mat->rowvalues && (idx || v)) {
1729     /*
1730         allocate enough space to hold information from the longest row.
1731     */
1732     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1733     PetscInt   max = 1,tmp;
1734     for (i=0; i<matin->rmap->n; i++) {
1735       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1736       if (max < tmp) max = tmp;
1737     }
1738     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1739   }
1740 
1741   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1742   lrow = row - rstart;
1743 
1744   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1745   if (!v)   {pvA = NULL; pvB = NULL;}
1746   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1747   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1748   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1749   nztot = nzA + nzB;
1750 
1751   cmap = mat->garray;
1752   if (v  || idx) {
1753     if (nztot) {
1754       /* Sort by increasing column numbers, assuming A and B already sorted */
1755       PetscInt imark = -1;
1756       if (v) {
1757         *v = v_p = mat->rowvalues;
1758         for (i=0; i<nzB; i++) {
1759           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1760           else break;
1761         }
1762         imark = i;
1763         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1764         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1765       }
1766       if (idx) {
1767         *idx = idx_p = mat->rowindices;
1768         if (imark > -1) {
1769           for (i=0; i<imark; i++) {
1770             idx_p[i] = cmap[cworkB[i]];
1771           }
1772         } else {
1773           for (i=0; i<nzB; i++) {
1774             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1775             else break;
1776           }
1777           imark = i;
1778         }
1779         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1780         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1781       }
1782     } else {
1783       if (idx) *idx = NULL;
1784       if (v)   *v   = NULL;
1785     }
1786   }
1787   *nz  = nztot;
1788   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1789   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1794 {
1795   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1796 
1797   PetscFunctionBegin;
1798   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1799   aij->getrowactive = PETSC_FALSE;
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1804 {
1805   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1806   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1807   PetscErrorCode ierr;
1808   PetscInt       i,j,cstart = mat->cmap->rstart;
1809   PetscReal      sum = 0.0;
1810   MatScalar      *v;
1811 
1812   PetscFunctionBegin;
1813   if (aij->size == 1) {
1814     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1815   } else {
1816     if (type == NORM_FROBENIUS) {
1817       v = amat->a;
1818       for (i=0; i<amat->nz; i++) {
1819         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1820       }
1821       v = bmat->a;
1822       for (i=0; i<bmat->nz; i++) {
1823         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1824       }
1825       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1826       *norm = PetscSqrtReal(*norm);
1827       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1828     } else if (type == NORM_1) { /* max column norm */
1829       PetscReal *tmp,*tmp2;
1830       PetscInt  *jj,*garray = aij->garray;
1831       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1832       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1833       *norm = 0.0;
1834       v     = amat->a; jj = amat->j;
1835       for (j=0; j<amat->nz; j++) {
1836         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1837       }
1838       v = bmat->a; jj = bmat->j;
1839       for (j=0; j<bmat->nz; j++) {
1840         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1841       }
1842       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1843       for (j=0; j<mat->cmap->N; j++) {
1844         if (tmp2[j] > *norm) *norm = tmp2[j];
1845       }
1846       ierr = PetscFree(tmp);CHKERRQ(ierr);
1847       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1848       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1849     } else if (type == NORM_INFINITY) { /* max row norm */
1850       PetscReal ntemp = 0.0;
1851       for (j=0; j<aij->A->rmap->n; j++) {
1852         v   = amat->a + amat->i[j];
1853         sum = 0.0;
1854         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1855           sum += PetscAbsScalar(*v); v++;
1856         }
1857         v = bmat->a + bmat->i[j];
1858         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1859           sum += PetscAbsScalar(*v); v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1864       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1865     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1871 {
1872   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1873   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1874   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1875   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1876   PetscErrorCode  ierr;
1877   Mat             B,A_diag,*B_diag;
1878   const MatScalar *pbv,*bv;
1879 
1880   PetscFunctionBegin;
1881   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1882   ai = Aloc->i; aj = Aloc->j;
1883   bi = Bloc->i; bj = Bloc->j;
1884   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1885     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1886     PetscSFNode          *oloc;
1887     PETSC_UNUSED PetscSF sf;
1888 
1889     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1890     /* compute d_nnz for preallocation */
1891     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1892     for (i=0; i<ai[ma]; i++) {
1893       d_nnz[aj[i]]++;
1894     }
1895     /* compute local off-diagonal contributions */
1896     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1897     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1898     /* map those to global */
1899     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1900     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1901     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1902     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1903     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1904     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1905     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1906 
1907     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1908     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1909     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1910     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1911     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1912     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1913   } else {
1914     B    = *matout;
1915     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1916   }
1917 
1918   b           = (Mat_MPIAIJ*)B->data;
1919   A_diag      = a->A;
1920   B_diag      = &b->A;
1921   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1922   A_diag_ncol = A_diag->cmap->N;
1923   B_diag_ilen = sub_B_diag->ilen;
1924   B_diag_i    = sub_B_diag->i;
1925 
1926   /* Set ilen for diagonal of B */
1927   for (i=0; i<A_diag_ncol; i++) {
1928     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1929   }
1930 
1931   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1932   very quickly (=without using MatSetValues), because all writes are local. */
1933   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1934 
1935   /* copy over the B part */
1936   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1937   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1938   pbv  = bv;
1939   row  = A->rmap->rstart;
1940   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1941   cols_tmp = cols;
1942   for (i=0; i<mb; i++) {
1943     ncol = bi[i+1]-bi[i];
1944     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1945     row++;
1946     pbv += ncol; cols_tmp += ncol;
1947   }
1948   ierr = PetscFree(cols);CHKERRQ(ierr);
1949   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1950 
1951   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1954     *matout = B;
1955   } else {
1956     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1962 {
1963   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1964   Mat            a    = aij->A,b = aij->B;
1965   PetscErrorCode ierr;
1966   PetscInt       s1,s2,s3;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1970   if (rr) {
1971     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1972     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1973     /* Overlap communication with computation. */
1974     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1975   }
1976   if (ll) {
1977     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1978     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1979     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1980   }
1981   /* scale  the diagonal block */
1982   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1983 
1984   if (rr) {
1985     /* Do a scatter end and then right scale the off-diagonal block */
1986     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1987     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1993 {
1994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2003 {
2004   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2005   Mat            a,b,c,d;
2006   PetscBool      flg;
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   a = matA->A; b = matA->B;
2011   c = matB->A; d = matB->B;
2012 
2013   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2014   if (flg) {
2015     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2016   }
2017   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2022 {
2023   PetscErrorCode ierr;
2024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2025   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2026 
2027   PetscFunctionBegin;
2028   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2029   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2030     /* because of the column compression in the off-processor part of the matrix a->B,
2031        the number of columns in a->B and b->B may be different, hence we cannot call
2032        the MatCopy() directly on the two parts. If need be, we can provide a more
2033        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2034        then copying the submatrices */
2035     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2036   } else {
2037     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2038     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2039   }
2040   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2045 {
2046   PetscErrorCode ierr;
2047 
2048   PetscFunctionBegin;
2049   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 /*
2054    Computes the number of nonzeros per row needed for preallocation when X and Y
2055    have different nonzero structure.
2056 */
2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2058 {
2059   PetscInt       i,j,k,nzx,nzy;
2060 
2061   PetscFunctionBegin;
2062   /* Set the number of nonzeros in the new matrix */
2063   for (i=0; i<m; i++) {
2064     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2065     nzx = xi[i+1] - xi[i];
2066     nzy = yi[i+1] - yi[i];
2067     nnz[i] = 0;
2068     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2069       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2070       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2071       nnz[i]++;
2072     }
2073     for (; k<nzy; k++) nnz[i]++;
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2080 {
2081   PetscErrorCode ierr;
2082   PetscInt       m = Y->rmap->N;
2083   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2084   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2085 
2086   PetscFunctionBegin;
2087   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2092 {
2093   PetscErrorCode ierr;
2094   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2095 
2096   PetscFunctionBegin;
2097   if (str == SAME_NONZERO_PATTERN) {
2098     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2099     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2100   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2101     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2102   } else {
2103     Mat      B;
2104     PetscInt *nnz_d,*nnz_o;
2105 
2106     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2107     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2108     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2109     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2110     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2111     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2112     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2113     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2114     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2115     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2116     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2117     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2118     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2124 
2125 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2126 {
2127 #if defined(PETSC_USE_COMPLEX)
2128   PetscErrorCode ierr;
2129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2130 
2131   PetscFunctionBegin;
2132   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2133   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2134 #else
2135   PetscFunctionBegin;
2136 #endif
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2147   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2152 {
2153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2154   PetscErrorCode ierr;
2155 
2156   PetscFunctionBegin;
2157   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2158   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2163 {
2164   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2165   PetscErrorCode    ierr;
2166   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2167   PetscScalar       *va,*vv;
2168   Vec               vB,vA;
2169   const PetscScalar *vb;
2170 
2171   PetscFunctionBegin;
2172   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2173   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2174 
2175   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2176   if (idx) {
2177     for (i=0; i<m; i++) {
2178       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2179     }
2180   }
2181 
2182   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2183   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2184   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2185 
2186   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2187   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2188   for (i=0; i<m; i++) {
2189     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2190       vv[i] = vb[i];
2191       if (idx) idx[i] = a->garray[idxb[i]];
2192     } else {
2193       vv[i] = va[i];
2194       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2195         idx[i] = a->garray[idxb[i]];
2196     }
2197   }
2198   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2199   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2200   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2201   ierr = PetscFree(idxb);CHKERRQ(ierr);
2202   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2203   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2210   PetscInt          m = A->rmap->n,n = A->cmap->n;
2211   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2212   PetscInt          *cmap  = mat->garray;
2213   PetscInt          *diagIdx, *offdiagIdx;
2214   Vec               diagV, offdiagV;
2215   PetscScalar       *a, *diagA, *offdiagA;
2216   const PetscScalar *ba,*bav;
2217   PetscInt          r,j,col,ncols,*bi,*bj;
2218   PetscErrorCode    ierr;
2219   Mat               B = mat->B;
2220   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2221 
2222   PetscFunctionBegin;
2223   /* When a process holds entire A and other processes have no entry */
2224   if (A->cmap->N == n) {
2225     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2226     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2227     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2228     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2229     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2230     PetscFunctionReturn(0);
2231   } else if (n == 0) {
2232     if (m) {
2233       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2234       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2235       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2236     }
2237     PetscFunctionReturn(0);
2238   }
2239 
2240   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2241   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2242   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2243   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2244 
2245   /* Get offdiagIdx[] for implicit 0.0 */
2246   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2247   ba   = bav;
2248   bi   = b->i;
2249   bj   = b->j;
2250   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2251   for (r = 0; r < m; r++) {
2252     ncols = bi[r+1] - bi[r];
2253     if (ncols == A->cmap->N - n) { /* Brow is dense */
2254       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2255     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2256       offdiagA[r] = 0.0;
2257 
2258       /* Find first hole in the cmap */
2259       for (j=0; j<ncols; j++) {
2260         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2261         if (col > j && j < cstart) {
2262           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2263           break;
2264         } else if (col > j + n && j >= cstart) {
2265           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2266           break;
2267         }
2268       }
2269       if (j == ncols && ncols < A->cmap->N - n) {
2270         /* a hole is outside compressed Bcols */
2271         if (ncols == 0) {
2272           if (cstart) {
2273             offdiagIdx[r] = 0;
2274           } else offdiagIdx[r] = cend;
2275         } else { /* ncols > 0 */
2276           offdiagIdx[r] = cmap[ncols-1] + 1;
2277           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2278         }
2279       }
2280     }
2281 
2282     for (j=0; j<ncols; j++) {
2283       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2284       ba++; bj++;
2285     }
2286   }
2287 
2288   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2289   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2290   for (r = 0; r < m; ++r) {
2291     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2292       a[r]   = diagA[r];
2293       if (idx) idx[r] = cstart + diagIdx[r];
2294     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2295       a[r] = diagA[r];
2296       if (idx) {
2297         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2298           idx[r] = cstart + diagIdx[r];
2299         } else idx[r] = offdiagIdx[r];
2300       }
2301     } else {
2302       a[r]   = offdiagA[r];
2303       if (idx) idx[r] = offdiagIdx[r];
2304     }
2305   }
2306   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2307   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2308   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2309   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2310   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2311   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2312   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2317 {
2318   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2319   PetscInt          m = A->rmap->n,n = A->cmap->n;
2320   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2321   PetscInt          *cmap  = mat->garray;
2322   PetscInt          *diagIdx, *offdiagIdx;
2323   Vec               diagV, offdiagV;
2324   PetscScalar       *a, *diagA, *offdiagA;
2325   const PetscScalar *ba,*bav;
2326   PetscInt          r,j,col,ncols,*bi,*bj;
2327   PetscErrorCode    ierr;
2328   Mat               B = mat->B;
2329   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2330 
2331   PetscFunctionBegin;
2332   /* When a process holds entire A and other processes have no entry */
2333   if (A->cmap->N == n) {
2334     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2335     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2336     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2337     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2338     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2339     PetscFunctionReturn(0);
2340   } else if (n == 0) {
2341     if (m) {
2342       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2343       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2344       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2345     }
2346     PetscFunctionReturn(0);
2347   }
2348 
2349   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2350   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2351   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2352   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2353 
2354   /* Get offdiagIdx[] for implicit 0.0 */
2355   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2356   ba   = bav;
2357   bi   = b->i;
2358   bj   = b->j;
2359   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2360   for (r = 0; r < m; r++) {
2361     ncols = bi[r+1] - bi[r];
2362     if (ncols == A->cmap->N - n) { /* Brow is dense */
2363       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2364     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2365       offdiagA[r] = 0.0;
2366 
2367       /* Find first hole in the cmap */
2368       for (j=0; j<ncols; j++) {
2369         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2370         if (col > j && j < cstart) {
2371           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2372           break;
2373         } else if (col > j + n && j >= cstart) {
2374           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2375           break;
2376         }
2377       }
2378       if (j == ncols && ncols < A->cmap->N - n) {
2379         /* a hole is outside compressed Bcols */
2380         if (ncols == 0) {
2381           if (cstart) {
2382             offdiagIdx[r] = 0;
2383           } else offdiagIdx[r] = cend;
2384         } else { /* ncols > 0 */
2385           offdiagIdx[r] = cmap[ncols-1] + 1;
2386           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2387         }
2388       }
2389     }
2390 
2391     for (j=0; j<ncols; j++) {
2392       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2393       ba++; bj++;
2394     }
2395   }
2396 
2397   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2398   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2399   for (r = 0; r < m; ++r) {
2400     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2401       a[r]   = diagA[r];
2402       if (idx) idx[r] = cstart + diagIdx[r];
2403     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2404       a[r] = diagA[r];
2405       if (idx) {
2406         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2407           idx[r] = cstart + diagIdx[r];
2408         } else idx[r] = offdiagIdx[r];
2409       }
2410     } else {
2411       a[r]   = offdiagA[r];
2412       if (idx) idx[r] = offdiagIdx[r];
2413     }
2414   }
2415   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2416   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2417   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2418   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2420   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2421   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2426 {
2427   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2428   PetscInt          m = A->rmap->n,n = A->cmap->n;
2429   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2430   PetscInt          *cmap  = mat->garray;
2431   PetscInt          *diagIdx, *offdiagIdx;
2432   Vec               diagV, offdiagV;
2433   PetscScalar       *a, *diagA, *offdiagA;
2434   const PetscScalar *ba,*bav;
2435   PetscInt          r,j,col,ncols,*bi,*bj;
2436   PetscErrorCode    ierr;
2437   Mat               B = mat->B;
2438   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2439 
2440   PetscFunctionBegin;
2441   /* When a process holds entire A and other processes have no entry */
2442   if (A->cmap->N == n) {
2443     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2444     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2445     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2446     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2447     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2448     PetscFunctionReturn(0);
2449   } else if (n == 0) {
2450     if (m) {
2451       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2452       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2453       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2454     }
2455     PetscFunctionReturn(0);
2456   }
2457 
2458   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2459   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2460   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2461   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2462 
2463   /* Get offdiagIdx[] for implicit 0.0 */
2464   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2465   ba   = bav;
2466   bi   = b->i;
2467   bj   = b->j;
2468   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2469   for (r = 0; r < m; r++) {
2470     ncols = bi[r+1] - bi[r];
2471     if (ncols == A->cmap->N - n) { /* Brow is dense */
2472       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2473     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2474       offdiagA[r] = 0.0;
2475 
2476       /* Find first hole in the cmap */
2477       for (j=0; j<ncols; j++) {
2478         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2479         if (col > j && j < cstart) {
2480           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2481           break;
2482         } else if (col > j + n && j >= cstart) {
2483           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2484           break;
2485         }
2486       }
2487       if (j == ncols && ncols < A->cmap->N - n) {
2488         /* a hole is outside compressed Bcols */
2489         if (ncols == 0) {
2490           if (cstart) {
2491             offdiagIdx[r] = 0;
2492           } else offdiagIdx[r] = cend;
2493         } else { /* ncols > 0 */
2494           offdiagIdx[r] = cmap[ncols-1] + 1;
2495           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2496         }
2497       }
2498     }
2499 
2500     for (j=0; j<ncols; j++) {
2501       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2502       ba++; bj++;
2503     }
2504   }
2505 
2506   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2507   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2508   for (r = 0; r < m; ++r) {
2509     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2510       a[r] = diagA[r];
2511       if (idx) idx[r] = cstart + diagIdx[r];
2512     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2513       a[r] = diagA[r];
2514       if (idx) {
2515         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2516           idx[r] = cstart + diagIdx[r];
2517         } else idx[r] = offdiagIdx[r];
2518       }
2519     } else {
2520       a[r] = offdiagA[r];
2521       if (idx) idx[r] = offdiagIdx[r];
2522     }
2523   }
2524   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2525   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2526   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2527   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2528   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2529   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2530   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2535 {
2536   PetscErrorCode ierr;
2537   Mat            *dummy;
2538 
2539   PetscFunctionBegin;
2540   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2541   *newmat = *dummy;
2542   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2547 {
2548   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2549   PetscErrorCode ierr;
2550 
2551   PetscFunctionBegin;
2552   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2553   A->factorerrortype = a->A->factorerrortype;
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2558 {
2559   PetscErrorCode ierr;
2560   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2561 
2562   PetscFunctionBegin;
2563   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2564   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2565   if (x->assembled) {
2566     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2567   } else {
2568     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2569   }
2570   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2571   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2576 {
2577   PetscFunctionBegin;
2578   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2579   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 /*@
2584    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2585 
2586    Collective on Mat
2587 
2588    Input Parameters:
2589 +    A - the matrix
2590 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2591 
2592  Level: advanced
2593 
2594 @*/
2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2596 {
2597   PetscErrorCode       ierr;
2598 
2599   PetscFunctionBegin;
2600   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2605 {
2606   PetscErrorCode       ierr;
2607   PetscBool            sc = PETSC_FALSE,flg;
2608 
2609   PetscFunctionBegin;
2610   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2611   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2612   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2613   if (flg) {
2614     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2615   }
2616   ierr = PetscOptionsTail();CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2621 {
2622   PetscErrorCode ierr;
2623   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2624   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2625 
2626   PetscFunctionBegin;
2627   if (!Y->preallocated) {
2628     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2629   } else if (!aij->nz) {
2630     PetscInt nonew = aij->nonew;
2631     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2632     aij->nonew = nonew;
2633   }
2634   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2639 {
2640   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2641   PetscErrorCode ierr;
2642 
2643   PetscFunctionBegin;
2644   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2645   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2646   if (d) {
2647     PetscInt rstart;
2648     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2649     *d += rstart;
2650 
2651   }
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2656 {
2657   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2658   PetscErrorCode ierr;
2659 
2660   PetscFunctionBegin;
2661   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 /* -------------------------------------------------------------------*/
2666 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2667                                        MatGetRow_MPIAIJ,
2668                                        MatRestoreRow_MPIAIJ,
2669                                        MatMult_MPIAIJ,
2670                                 /* 4*/ MatMultAdd_MPIAIJ,
2671                                        MatMultTranspose_MPIAIJ,
2672                                        MatMultTransposeAdd_MPIAIJ,
2673                                        NULL,
2674                                        NULL,
2675                                        NULL,
2676                                 /*10*/ NULL,
2677                                        NULL,
2678                                        NULL,
2679                                        MatSOR_MPIAIJ,
2680                                        MatTranspose_MPIAIJ,
2681                                 /*15*/ MatGetInfo_MPIAIJ,
2682                                        MatEqual_MPIAIJ,
2683                                        MatGetDiagonal_MPIAIJ,
2684                                        MatDiagonalScale_MPIAIJ,
2685                                        MatNorm_MPIAIJ,
2686                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2687                                        MatAssemblyEnd_MPIAIJ,
2688                                        MatSetOption_MPIAIJ,
2689                                        MatZeroEntries_MPIAIJ,
2690                                 /*24*/ MatZeroRows_MPIAIJ,
2691                                        NULL,
2692                                        NULL,
2693                                        NULL,
2694                                        NULL,
2695                                 /*29*/ MatSetUp_MPIAIJ,
2696                                        NULL,
2697                                        NULL,
2698                                        MatGetDiagonalBlock_MPIAIJ,
2699                                        NULL,
2700                                 /*34*/ MatDuplicate_MPIAIJ,
2701                                        NULL,
2702                                        NULL,
2703                                        NULL,
2704                                        NULL,
2705                                 /*39*/ MatAXPY_MPIAIJ,
2706                                        MatCreateSubMatrices_MPIAIJ,
2707                                        MatIncreaseOverlap_MPIAIJ,
2708                                        MatGetValues_MPIAIJ,
2709                                        MatCopy_MPIAIJ,
2710                                 /*44*/ MatGetRowMax_MPIAIJ,
2711                                        MatScale_MPIAIJ,
2712                                        MatShift_MPIAIJ,
2713                                        MatDiagonalSet_MPIAIJ,
2714                                        MatZeroRowsColumns_MPIAIJ,
2715                                 /*49*/ MatSetRandom_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2721                                        NULL,
2722                                        MatSetUnfactored_MPIAIJ,
2723                                        MatPermute_MPIAIJ,
2724                                        NULL,
2725                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2726                                        MatDestroy_MPIAIJ,
2727                                        MatView_MPIAIJ,
2728                                        NULL,
2729                                        NULL,
2730                                 /*64*/ NULL,
2731                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2736                                        MatGetRowMinAbs_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                 /*75*/ MatFDColoringApply_AIJ,
2742                                        MatSetFromOptions_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        MatFindZeroDiagonals_MPIAIJ,
2746                                 /*80*/ NULL,
2747                                        NULL,
2748                                        NULL,
2749                                 /*83*/ MatLoad_MPIAIJ,
2750                                        MatIsSymmetric_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                 /*89*/ NULL,
2756                                        NULL,
2757                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        MatBindToCPU_MPIAIJ,
2765                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        MatConjugate_MPIAIJ,
2769                                        NULL,
2770                                 /*104*/MatSetValuesRow_MPIAIJ,
2771                                        MatRealPart_MPIAIJ,
2772                                        MatImaginaryPart_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                 /*109*/NULL,
2776                                        NULL,
2777                                        MatGetRowMin_MPIAIJ,
2778                                        NULL,
2779                                        MatMissingDiagonal_MPIAIJ,
2780                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2781                                        NULL,
2782                                        MatGetGhosts_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        MatGetMultiProcBlock_MPIAIJ,
2790                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2791                                        MatGetColumnNorms_MPIAIJ,
2792                                        MatInvertBlockDiagonal_MPIAIJ,
2793                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2794                                        MatCreateSubMatricesMPI_MPIAIJ,
2795                                 /*129*/NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2799                                        NULL,
2800                                 /*134*/NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                 /*139*/MatSetBlockSizes_MPIAIJ,
2806                                        NULL,
2807                                        NULL,
2808                                        MatFDColoringSetUp_MPIXAIJ,
2809                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2810                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2811                                 /*145*/NULL,
2812                                        NULL,
2813                                        NULL
2814 };
2815 
2816 /* ----------------------------------------------------------------------------------------*/
2817 
2818 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2819 {
2820   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2821   PetscErrorCode ierr;
2822 
2823   PetscFunctionBegin;
2824   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2825   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2830 {
2831   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2836   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2841 {
2842   Mat_MPIAIJ     *b;
2843   PetscErrorCode ierr;
2844   PetscMPIInt    size;
2845 
2846   PetscFunctionBegin;
2847   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2848   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2849   b = (Mat_MPIAIJ*)B->data;
2850 
2851 #if defined(PETSC_USE_CTABLE)
2852   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2853 #else
2854   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2855 #endif
2856   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2857   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2858   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2859 
2860   /* Because the B will have been resized we simply destroy it and create a new one each time */
2861   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2862   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2863   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2864   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2865   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2866   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2867   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2868 
2869   if (!B->preallocated) {
2870     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2871     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2872     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2873     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2874     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2875   }
2876 
2877   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2878   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2879   B->preallocated  = PETSC_TRUE;
2880   B->was_assembled = PETSC_FALSE;
2881   B->assembled     = PETSC_FALSE;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2886 {
2887   Mat_MPIAIJ     *b;
2888   PetscErrorCode ierr;
2889 
2890   PetscFunctionBegin;
2891   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2892   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2893   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2894   b = (Mat_MPIAIJ*)B->data;
2895 
2896 #if defined(PETSC_USE_CTABLE)
2897   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2898 #else
2899   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2900 #endif
2901   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2902   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2903   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2904 
2905   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2906   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2907   B->preallocated  = PETSC_TRUE;
2908   B->was_assembled = PETSC_FALSE;
2909   B->assembled = PETSC_FALSE;
2910   PetscFunctionReturn(0);
2911 }
2912 
2913 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2914 {
2915   Mat            mat;
2916   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2917   PetscErrorCode ierr;
2918 
2919   PetscFunctionBegin;
2920   *newmat = NULL;
2921   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2922   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2923   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2924   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2925   a       = (Mat_MPIAIJ*)mat->data;
2926 
2927   mat->factortype   = matin->factortype;
2928   mat->assembled    = matin->assembled;
2929   mat->insertmode   = NOT_SET_VALUES;
2930   mat->preallocated = matin->preallocated;
2931 
2932   a->size         = oldmat->size;
2933   a->rank         = oldmat->rank;
2934   a->donotstash   = oldmat->donotstash;
2935   a->roworiented  = oldmat->roworiented;
2936   a->rowindices   = NULL;
2937   a->rowvalues    = NULL;
2938   a->getrowactive = PETSC_FALSE;
2939 
2940   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2941   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2942 
2943   if (oldmat->colmap) {
2944 #if defined(PETSC_USE_CTABLE)
2945     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2946 #else
2947     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2948     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2949     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2950 #endif
2951   } else a->colmap = NULL;
2952   if (oldmat->garray) {
2953     PetscInt len;
2954     len  = oldmat->B->cmap->n;
2955     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2956     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2957     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2958   } else a->garray = NULL;
2959 
2960   /* It may happen MatDuplicate is called with a non-assembled matrix
2961      In fact, MatDuplicate only requires the matrix to be preallocated
2962      This may happen inside a DMCreateMatrix_Shell */
2963   if (oldmat->lvec) {
2964     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2965     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2966   }
2967   if (oldmat->Mvctx) {
2968     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2969     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2970   }
2971   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2972   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2973   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2974   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2975   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2976   *newmat = mat;
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2981 {
2982   PetscBool      isbinary, ishdf5;
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2987   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2988   /* force binary viewer to load .info file if it has not yet done so */
2989   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2990   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2992   if (isbinary) {
2993     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2994   } else if (ishdf5) {
2995 #if defined(PETSC_HAVE_HDF5)
2996     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2997 #else
2998     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2999 #endif
3000   } else {
3001     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3002   }
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3007 {
3008   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3009   PetscInt       *rowidxs,*colidxs;
3010   PetscScalar    *matvals;
3011   PetscErrorCode ierr;
3012 
3013   PetscFunctionBegin;
3014   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3015 
3016   /* read in matrix header */
3017   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3018   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3019   M  = header[1]; N = header[2]; nz = header[3];
3020   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3021   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3022   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3023 
3024   /* set block sizes from the viewer's .info file */
3025   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3026   /* set global sizes if not set already */
3027   if (mat->rmap->N < 0) mat->rmap->N = M;
3028   if (mat->cmap->N < 0) mat->cmap->N = N;
3029   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3030   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3031 
3032   /* check if the matrix sizes are correct */
3033   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3034   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3035 
3036   /* read in row lengths and build row indices */
3037   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3038   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3039   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3040   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3041   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3042   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3043   /* read in column indices and matrix values */
3044   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3045   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3046   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3047   /* store matrix indices and values */
3048   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3049   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3050   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 /* Not scalable because of ISAllGather() unless getting all columns. */
3055 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3056 {
3057   PetscErrorCode ierr;
3058   IS             iscol_local;
3059   PetscBool      isstride;
3060   PetscMPIInt    lisstride=0,gisstride;
3061 
3062   PetscFunctionBegin;
3063   /* check if we are grabbing all columns*/
3064   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3065 
3066   if (isstride) {
3067     PetscInt  start,len,mstart,mlen;
3068     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3069     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3070     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3071     if (mstart == start && mlen-mstart == len) lisstride = 1;
3072   }
3073 
3074   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3075   if (gisstride) {
3076     PetscInt N;
3077     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3078     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3079     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3080     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3081   } else {
3082     PetscInt cbs;
3083     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3084     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3085     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3086   }
3087 
3088   *isseq = iscol_local;
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 /*
3093  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3094  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3095 
3096  Input Parameters:
3097    mat - matrix
3098    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3099            i.e., mat->rstart <= isrow[i] < mat->rend
3100    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3101            i.e., mat->cstart <= iscol[i] < mat->cend
3102  Output Parameter:
3103    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3104    iscol_o - sequential column index set for retrieving mat->B
3105    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3106  */
3107 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3108 {
3109   PetscErrorCode ierr;
3110   Vec            x,cmap;
3111   const PetscInt *is_idx;
3112   PetscScalar    *xarray,*cmaparray;
3113   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3114   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3115   Mat            B=a->B;
3116   Vec            lvec=a->lvec,lcmap;
3117   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3118   MPI_Comm       comm;
3119   VecScatter     Mvctx=a->Mvctx;
3120 
3121   PetscFunctionBegin;
3122   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3123   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3124 
3125   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3126   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3127   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3128   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3129   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3130 
3131   /* Get start indices */
3132   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3133   isstart -= ncols;
3134   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3135 
3136   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3137   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3138   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3139   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3140   for (i=0; i<ncols; i++) {
3141     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3142     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3143     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3144   }
3145   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3146   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3147   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3148 
3149   /* Get iscol_d */
3150   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3151   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3152   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3153 
3154   /* Get isrow_d */
3155   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3156   rstart = mat->rmap->rstart;
3157   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3158   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3159   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3160   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3161 
3162   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3163   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3164   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3165 
3166   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3167   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3168   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3169 
3170   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3171 
3172   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3173   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3174 
3175   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3176   /* off-process column indices */
3177   count = 0;
3178   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3179   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3180 
3181   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3182   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3183   for (i=0; i<Bn; i++) {
3184     if (PetscRealPart(xarray[i]) > -1.0) {
3185       idx[count]     = i;                   /* local column index in off-diagonal part B */
3186       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3187       count++;
3188     }
3189   }
3190   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3191   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3192 
3193   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3194   /* cannot ensure iscol_o has same blocksize as iscol! */
3195 
3196   ierr = PetscFree(idx);CHKERRQ(ierr);
3197   *garray = cmap1;
3198 
3199   ierr = VecDestroy(&x);CHKERRQ(ierr);
3200   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3201   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3206 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3207 {
3208   PetscErrorCode ierr;
3209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3210   Mat            M = NULL;
3211   MPI_Comm       comm;
3212   IS             iscol_d,isrow_d,iscol_o;
3213   Mat            Asub = NULL,Bsub = NULL;
3214   PetscInt       n;
3215 
3216   PetscFunctionBegin;
3217   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3218 
3219   if (call == MAT_REUSE_MATRIX) {
3220     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3221     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3222     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3223 
3224     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3225     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3226 
3227     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3228     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3229 
3230     /* Update diagonal and off-diagonal portions of submat */
3231     asub = (Mat_MPIAIJ*)(*submat)->data;
3232     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3233     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3234     if (n) {
3235       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3236     }
3237     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3239 
3240   } else { /* call == MAT_INITIAL_MATRIX) */
3241     const PetscInt *garray;
3242     PetscInt        BsubN;
3243 
3244     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3245     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3246 
3247     /* Create local submatrices Asub and Bsub */
3248     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3249     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3250 
3251     /* Create submatrix M */
3252     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3253 
3254     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3255     asub = (Mat_MPIAIJ*)M->data;
3256 
3257     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3258     n = asub->B->cmap->N;
3259     if (BsubN > n) {
3260       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3261       const PetscInt *idx;
3262       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3263       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3264 
3265       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3266       j = 0;
3267       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3268       for (i=0; i<n; i++) {
3269         if (j >= BsubN) break;
3270         while (subgarray[i] > garray[j]) j++;
3271 
3272         if (subgarray[i] == garray[j]) {
3273           idx_new[i] = idx[j++];
3274         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3275       }
3276       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3277 
3278       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3279       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3280 
3281     } else if (BsubN < n) {
3282       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3283     }
3284 
3285     ierr = PetscFree(garray);CHKERRQ(ierr);
3286     *submat = M;
3287 
3288     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3289     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3290     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3291 
3292     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3293     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3294 
3295     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3296     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3297   }
3298   PetscFunctionReturn(0);
3299 }
3300 
3301 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3302 {
3303   PetscErrorCode ierr;
3304   IS             iscol_local=NULL,isrow_d;
3305   PetscInt       csize;
3306   PetscInt       n,i,j,start,end;
3307   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3308   MPI_Comm       comm;
3309 
3310   PetscFunctionBegin;
3311   /* If isrow has same processor distribution as mat,
3312      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3313   if (call == MAT_REUSE_MATRIX) {
3314     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3315     if (isrow_d) {
3316       sameRowDist  = PETSC_TRUE;
3317       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3318     } else {
3319       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3320       if (iscol_local) {
3321         sameRowDist  = PETSC_TRUE;
3322         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3323       }
3324     }
3325   } else {
3326     /* Check if isrow has same processor distribution as mat */
3327     sameDist[0] = PETSC_FALSE;
3328     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3329     if (!n) {
3330       sameDist[0] = PETSC_TRUE;
3331     } else {
3332       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3333       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3334       if (i >= start && j < end) {
3335         sameDist[0] = PETSC_TRUE;
3336       }
3337     }
3338 
3339     /* Check if iscol has same processor distribution as mat */
3340     sameDist[1] = PETSC_FALSE;
3341     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3342     if (!n) {
3343       sameDist[1] = PETSC_TRUE;
3344     } else {
3345       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3346       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3347       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3348     }
3349 
3350     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3351     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3352     sameRowDist = tsameDist[0];
3353   }
3354 
3355   if (sameRowDist) {
3356     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3357       /* isrow and iscol have same processor distribution as mat */
3358       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3359       PetscFunctionReturn(0);
3360     } else { /* sameRowDist */
3361       /* isrow has same processor distribution as mat */
3362       if (call == MAT_INITIAL_MATRIX) {
3363         PetscBool sorted;
3364         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3365         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3366         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3367         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3368 
3369         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3370         if (sorted) {
3371           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3372           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3373           PetscFunctionReturn(0);
3374         }
3375       } else { /* call == MAT_REUSE_MATRIX */
3376         IS iscol_sub;
3377         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3378         if (iscol_sub) {
3379           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3380           PetscFunctionReturn(0);
3381         }
3382       }
3383     }
3384   }
3385 
3386   /* General case: iscol -> iscol_local which has global size of iscol */
3387   if (call == MAT_REUSE_MATRIX) {
3388     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3389     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3390   } else {
3391     if (!iscol_local) {
3392       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3393     }
3394   }
3395 
3396   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3397   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3398 
3399   if (call == MAT_INITIAL_MATRIX) {
3400     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3401     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3402   }
3403   PetscFunctionReturn(0);
3404 }
3405 
3406 /*@C
3407      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3408          and "off-diagonal" part of the matrix in CSR format.
3409 
3410    Collective
3411 
3412    Input Parameters:
3413 +  comm - MPI communicator
3414 .  A - "diagonal" portion of matrix
3415 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3416 -  garray - global index of B columns
3417 
3418    Output Parameter:
3419 .   mat - the matrix, with input A as its local diagonal matrix
3420    Level: advanced
3421 
3422    Notes:
3423        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3424        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3425 
3426 .seealso: MatCreateMPIAIJWithSplitArrays()
3427 @*/
3428 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3429 {
3430   PetscErrorCode    ierr;
3431   Mat_MPIAIJ        *maij;
3432   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3433   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3434   const PetscScalar *oa;
3435   Mat               Bnew;
3436   PetscInt          m,n,N;
3437 
3438   PetscFunctionBegin;
3439   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3440   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3441   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3442   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3443   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3444   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3445 
3446   /* Get global columns of mat */
3447   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3448 
3449   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3450   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3451   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3452   maij = (Mat_MPIAIJ*)(*mat)->data;
3453 
3454   (*mat)->preallocated = PETSC_TRUE;
3455 
3456   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3457   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3458 
3459   /* Set A as diagonal portion of *mat */
3460   maij->A = A;
3461 
3462   nz = oi[m];
3463   for (i=0; i<nz; i++) {
3464     col   = oj[i];
3465     oj[i] = garray[col];
3466   }
3467 
3468   /* Set Bnew as off-diagonal portion of *mat */
3469   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3470   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3471   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3472   bnew        = (Mat_SeqAIJ*)Bnew->data;
3473   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3474   maij->B     = Bnew;
3475 
3476   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3477 
3478   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3479   b->free_a       = PETSC_FALSE;
3480   b->free_ij      = PETSC_FALSE;
3481   ierr = MatDestroy(&B);CHKERRQ(ierr);
3482 
3483   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3484   bnew->free_a       = PETSC_TRUE;
3485   bnew->free_ij      = PETSC_TRUE;
3486 
3487   /* condense columns of maij->B */
3488   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3489   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3490   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3491   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3492   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3497 
3498 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3499 {
3500   PetscErrorCode ierr;
3501   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3502   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3503   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3504   Mat            M,Msub,B=a->B;
3505   MatScalar      *aa;
3506   Mat_SeqAIJ     *aij;
3507   PetscInt       *garray = a->garray,*colsub,Ncols;
3508   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3509   IS             iscol_sub,iscmap;
3510   const PetscInt *is_idx,*cmap;
3511   PetscBool      allcolumns=PETSC_FALSE;
3512   MPI_Comm       comm;
3513 
3514   PetscFunctionBegin;
3515   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3516   if (call == MAT_REUSE_MATRIX) {
3517     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3518     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3519     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3520 
3521     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3522     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3523 
3524     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3525     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3526 
3527     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3528 
3529   } else { /* call == MAT_INITIAL_MATRIX) */
3530     PetscBool flg;
3531 
3532     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3533     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3534 
3535     /* (1) iscol -> nonscalable iscol_local */
3536     /* Check for special case: each processor gets entire matrix columns */
3537     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3538     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3539     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3540     if (allcolumns) {
3541       iscol_sub = iscol_local;
3542       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3543       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3544 
3545     } else {
3546       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3547       PetscInt *idx,*cmap1,k;
3548       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3549       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3550       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3551       count = 0;
3552       k     = 0;
3553       for (i=0; i<Ncols; i++) {
3554         j = is_idx[i];
3555         if (j >= cstart && j < cend) {
3556           /* diagonal part of mat */
3557           idx[count]     = j;
3558           cmap1[count++] = i; /* column index in submat */
3559         } else if (Bn) {
3560           /* off-diagonal part of mat */
3561           if (j == garray[k]) {
3562             idx[count]     = j;
3563             cmap1[count++] = i;  /* column index in submat */
3564           } else if (j > garray[k]) {
3565             while (j > garray[k] && k < Bn-1) k++;
3566             if (j == garray[k]) {
3567               idx[count]     = j;
3568               cmap1[count++] = i; /* column index in submat */
3569             }
3570           }
3571         }
3572       }
3573       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3574 
3575       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3576       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3577       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3578 
3579       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3580     }
3581 
3582     /* (3) Create sequential Msub */
3583     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3584   }
3585 
3586   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3587   aij  = (Mat_SeqAIJ*)(Msub)->data;
3588   ii   = aij->i;
3589   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3590 
3591   /*
3592       m - number of local rows
3593       Ncols - number of columns (same on all processors)
3594       rstart - first row in new global matrix generated
3595   */
3596   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3597 
3598   if (call == MAT_INITIAL_MATRIX) {
3599     /* (4) Create parallel newmat */
3600     PetscMPIInt    rank,size;
3601     PetscInt       csize;
3602 
3603     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3604     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3605 
3606     /*
3607         Determine the number of non-zeros in the diagonal and off-diagonal
3608         portions of the matrix in order to do correct preallocation
3609     */
3610 
3611     /* first get start and end of "diagonal" columns */
3612     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3613     if (csize == PETSC_DECIDE) {
3614       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3615       if (mglobal == Ncols) { /* square matrix */
3616         nlocal = m;
3617       } else {
3618         nlocal = Ncols/size + ((Ncols % size) > rank);
3619       }
3620     } else {
3621       nlocal = csize;
3622     }
3623     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3624     rstart = rend - nlocal;
3625     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3626 
3627     /* next, compute all the lengths */
3628     jj    = aij->j;
3629     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3630     olens = dlens + m;
3631     for (i=0; i<m; i++) {
3632       jend = ii[i+1] - ii[i];
3633       olen = 0;
3634       dlen = 0;
3635       for (j=0; j<jend; j++) {
3636         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3637         else dlen++;
3638         jj++;
3639       }
3640       olens[i] = olen;
3641       dlens[i] = dlen;
3642     }
3643 
3644     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3645     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3646 
3647     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3648     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3649     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3650     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3651     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3652     ierr = PetscFree(dlens);CHKERRQ(ierr);
3653 
3654   } else { /* call == MAT_REUSE_MATRIX */
3655     M    = *newmat;
3656     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3657     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3658     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3659     /*
3660          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3661        rather than the slower MatSetValues().
3662     */
3663     M->was_assembled = PETSC_TRUE;
3664     M->assembled     = PETSC_FALSE;
3665   }
3666 
3667   /* (5) Set values of Msub to *newmat */
3668   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3669   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3670 
3671   jj   = aij->j;
3672   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3673   for (i=0; i<m; i++) {
3674     row = rstart + i;
3675     nz  = ii[i+1] - ii[i];
3676     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3677     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3678     jj += nz; aa += nz;
3679   }
3680   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3681   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3682 
3683   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3685 
3686   ierr = PetscFree(colsub);CHKERRQ(ierr);
3687 
3688   /* save Msub, iscol_sub and iscmap used in processor for next request */
3689   if (call == MAT_INITIAL_MATRIX) {
3690     *newmat = M;
3691     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3692     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3693 
3694     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3695     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3696 
3697     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3698     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3699 
3700     if (iscol_local) {
3701       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3702       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3703     }
3704   }
3705   PetscFunctionReturn(0);
3706 }
3707 
3708 /*
3709     Not great since it makes two copies of the submatrix, first an SeqAIJ
3710   in local and then by concatenating the local matrices the end result.
3711   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3712 
3713   Note: This requires a sequential iscol with all indices.
3714 */
3715 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3716 {
3717   PetscErrorCode ierr;
3718   PetscMPIInt    rank,size;
3719   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3720   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3721   Mat            M,Mreuse;
3722   MatScalar      *aa,*vwork;
3723   MPI_Comm       comm;
3724   Mat_SeqAIJ     *aij;
3725   PetscBool      colflag,allcolumns=PETSC_FALSE;
3726 
3727   PetscFunctionBegin;
3728   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3729   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3730   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3731 
3732   /* Check for special case: each processor gets entire matrix columns */
3733   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3734   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3735   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3736   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3737 
3738   if (call ==  MAT_REUSE_MATRIX) {
3739     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3740     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3741     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3742   } else {
3743     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3744   }
3745 
3746   /*
3747       m - number of local rows
3748       n - number of columns (same on all processors)
3749       rstart - first row in new global matrix generated
3750   */
3751   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3752   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3753   if (call == MAT_INITIAL_MATRIX) {
3754     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3755     ii  = aij->i;
3756     jj  = aij->j;
3757 
3758     /*
3759         Determine the number of non-zeros in the diagonal and off-diagonal
3760         portions of the matrix in order to do correct preallocation
3761     */
3762 
3763     /* first get start and end of "diagonal" columns */
3764     if (csize == PETSC_DECIDE) {
3765       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3766       if (mglobal == n) { /* square matrix */
3767         nlocal = m;
3768       } else {
3769         nlocal = n/size + ((n % size) > rank);
3770       }
3771     } else {
3772       nlocal = csize;
3773     }
3774     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3775     rstart = rend - nlocal;
3776     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3777 
3778     /* next, compute all the lengths */
3779     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3780     olens = dlens + m;
3781     for (i=0; i<m; i++) {
3782       jend = ii[i+1] - ii[i];
3783       olen = 0;
3784       dlen = 0;
3785       for (j=0; j<jend; j++) {
3786         if (*jj < rstart || *jj >= rend) olen++;
3787         else dlen++;
3788         jj++;
3789       }
3790       olens[i] = olen;
3791       dlens[i] = dlen;
3792     }
3793     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3794     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3795     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3796     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3797     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3798     ierr = PetscFree(dlens);CHKERRQ(ierr);
3799   } else {
3800     PetscInt ml,nl;
3801 
3802     M    = *newmat;
3803     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3804     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3805     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3806     /*
3807          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3808        rather than the slower MatSetValues().
3809     */
3810     M->was_assembled = PETSC_TRUE;
3811     M->assembled     = PETSC_FALSE;
3812   }
3813   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3814   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3815   ii   = aij->i;
3816   jj   = aij->j;
3817 
3818   /* trigger copy to CPU if needed */
3819   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3820   for (i=0; i<m; i++) {
3821     row   = rstart + i;
3822     nz    = ii[i+1] - ii[i];
3823     cwork = jj; jj += nz;
3824     vwork = aa; aa += nz;
3825     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3826   }
3827   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3828 
3829   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   *newmat = M;
3832 
3833   /* save submatrix used in processor for next request */
3834   if (call ==  MAT_INITIAL_MATRIX) {
3835     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3836     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3837   }
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3842 {
3843   PetscInt       m,cstart, cend,j,nnz,i,d;
3844   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3845   const PetscInt *JJ;
3846   PetscErrorCode ierr;
3847   PetscBool      nooffprocentries;
3848 
3849   PetscFunctionBegin;
3850   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3851 
3852   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3853   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3854   m      = B->rmap->n;
3855   cstart = B->cmap->rstart;
3856   cend   = B->cmap->rend;
3857   rstart = B->rmap->rstart;
3858 
3859   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3860 
3861   if (PetscDefined(USE_DEBUG)) {
3862     for (i=0; i<m; i++) {
3863       nnz = Ii[i+1]- Ii[i];
3864       JJ  = J + Ii[i];
3865       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3866       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3867       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3868     }
3869   }
3870 
3871   for (i=0; i<m; i++) {
3872     nnz     = Ii[i+1]- Ii[i];
3873     JJ      = J + Ii[i];
3874     nnz_max = PetscMax(nnz_max,nnz);
3875     d       = 0;
3876     for (j=0; j<nnz; j++) {
3877       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3878     }
3879     d_nnz[i] = d;
3880     o_nnz[i] = nnz - d;
3881   }
3882   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3883   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3884 
3885   for (i=0; i<m; i++) {
3886     ii   = i + rstart;
3887     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3888   }
3889   nooffprocentries    = B->nooffprocentries;
3890   B->nooffprocentries = PETSC_TRUE;
3891   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   B->nooffprocentries = nooffprocentries;
3894 
3895   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 /*@
3900    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3901    (the default parallel PETSc format).
3902 
3903    Collective
3904 
3905    Input Parameters:
3906 +  B - the matrix
3907 .  i - the indices into j for the start of each local row (starts with zero)
3908 .  j - the column indices for each local row (starts with zero)
3909 -  v - optional values in the matrix
3910 
3911    Level: developer
3912 
3913    Notes:
3914        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3915      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3916      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3917 
3918        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3919 
3920        The format which is used for the sparse matrix input, is equivalent to a
3921     row-major ordering.. i.e for the following matrix, the input data expected is
3922     as shown
3923 
3924 $        1 0 0
3925 $        2 0 3     P0
3926 $       -------
3927 $        4 5 6     P1
3928 $
3929 $     Process0 [P0]: rows_owned=[0,1]
3930 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3931 $        j =  {0,0,2}  [size = 3]
3932 $        v =  {1,2,3}  [size = 3]
3933 $
3934 $     Process1 [P1]: rows_owned=[2]
3935 $        i =  {0,3}    [size = nrow+1  = 1+1]
3936 $        j =  {0,1,2}  [size = 3]
3937 $        v =  {4,5,6}  [size = 3]
3938 
3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3940           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3941 @*/
3942 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3943 {
3944   PetscErrorCode ierr;
3945 
3946   PetscFunctionBegin;
3947   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3948   PetscFunctionReturn(0);
3949 }
3950 
3951 /*@C
3952    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3953    (the default parallel PETSc format).  For good matrix assembly performance
3954    the user should preallocate the matrix storage by setting the parameters
3955    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3956    performance can be increased by more than a factor of 50.
3957 
3958    Collective
3959 
3960    Input Parameters:
3961 +  B - the matrix
3962 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3963            (same value is used for all local rows)
3964 .  d_nnz - array containing the number of nonzeros in the various rows of the
3965            DIAGONAL portion of the local submatrix (possibly different for each row)
3966            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3967            The size of this array is equal to the number of local rows, i.e 'm'.
3968            For matrices that will be factored, you must leave room for (and set)
3969            the diagonal entry even if it is zero.
3970 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3971            submatrix (same value is used for all local rows).
3972 -  o_nnz - array containing the number of nonzeros in the various rows of the
3973            OFF-DIAGONAL portion of the local submatrix (possibly different for
3974            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3975            structure. The size of this array is equal to the number
3976            of local rows, i.e 'm'.
3977 
3978    If the *_nnz parameter is given then the *_nz parameter is ignored
3979 
3980    The AIJ format (also called the Yale sparse matrix format or
3981    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3982    storage.  The stored row and column indices begin with zero.
3983    See Users-Manual: ch_mat for details.
3984 
3985    The parallel matrix is partitioned such that the first m0 rows belong to
3986    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3987    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3988 
3989    The DIAGONAL portion of the local submatrix of a processor can be defined
3990    as the submatrix which is obtained by extraction the part corresponding to
3991    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3992    first row that belongs to the processor, r2 is the last row belonging to
3993    the this processor, and c1-c2 is range of indices of the local part of a
3994    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3995    common case of a square matrix, the row and column ranges are the same and
3996    the DIAGONAL part is also square. The remaining portion of the local
3997    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3998 
3999    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4000 
4001    You can call MatGetInfo() to get information on how effective the preallocation was;
4002    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4003    You can also run with the option -info and look for messages with the string
4004    malloc in them to see if additional memory allocation was needed.
4005 
4006    Example usage:
4007 
4008    Consider the following 8x8 matrix with 34 non-zero values, that is
4009    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4010    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4011    as follows:
4012 
4013 .vb
4014             1  2  0  |  0  3  0  |  0  4
4015     Proc0   0  5  6  |  7  0  0  |  8  0
4016             9  0 10  | 11  0  0  | 12  0
4017     -------------------------------------
4018            13  0 14  | 15 16 17  |  0  0
4019     Proc1   0 18  0  | 19 20 21  |  0  0
4020             0  0  0  | 22 23  0  | 24  0
4021     -------------------------------------
4022     Proc2  25 26 27  |  0  0 28  | 29  0
4023            30  0  0  | 31 32 33  |  0 34
4024 .ve
4025 
4026    This can be represented as a collection of submatrices as:
4027 
4028 .vb
4029       A B C
4030       D E F
4031       G H I
4032 .ve
4033 
4034    Where the submatrices A,B,C are owned by proc0, D,E,F are
4035    owned by proc1, G,H,I are owned by proc2.
4036 
4037    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4038    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4039    The 'M','N' parameters are 8,8, and have the same values on all procs.
4040 
4041    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4042    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4043    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4044    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4045    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4046    matrix, ans [DF] as another SeqAIJ matrix.
4047 
4048    When d_nz, o_nz parameters are specified, d_nz storage elements are
4049    allocated for every row of the local diagonal submatrix, and o_nz
4050    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4051    One way to choose d_nz and o_nz is to use the max nonzerors per local
4052    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4053    In this case, the values of d_nz,o_nz are:
4054 .vb
4055      proc0 : dnz = 2, o_nz = 2
4056      proc1 : dnz = 3, o_nz = 2
4057      proc2 : dnz = 1, o_nz = 4
4058 .ve
4059    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4060    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4061    for proc3. i.e we are using 12+15+10=37 storage locations to store
4062    34 values.
4063 
4064    When d_nnz, o_nnz parameters are specified, the storage is specified
4065    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4066    In the above case the values for d_nnz,o_nnz are:
4067 .vb
4068      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4069      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4070      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4071 .ve
4072    Here the space allocated is sum of all the above values i.e 34, and
4073    hence pre-allocation is perfect.
4074 
4075    Level: intermediate
4076 
4077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4078           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4079 @*/
4080 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4081 {
4082   PetscErrorCode ierr;
4083 
4084   PetscFunctionBegin;
4085   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4086   PetscValidType(B,1);
4087   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4088   PetscFunctionReturn(0);
4089 }
4090 
4091 /*@
4092      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4093          CSR format for the local rows.
4094 
4095    Collective
4096 
4097    Input Parameters:
4098 +  comm - MPI communicator
4099 .  m - number of local rows (Cannot be PETSC_DECIDE)
4100 .  n - This value should be the same as the local size used in creating the
4101        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4102        calculated if N is given) For square matrices n is almost always m.
4103 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4104 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4105 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4106 .   j - column indices
4107 -   a - matrix values
4108 
4109    Output Parameter:
4110 .   mat - the matrix
4111 
4112    Level: intermediate
4113 
4114    Notes:
4115        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4116      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4117      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4118 
4119        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4120 
4121        The format which is used for the sparse matrix input, is equivalent to a
4122     row-major ordering.. i.e for the following matrix, the input data expected is
4123     as shown
4124 
4125        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4126 
4127 $        1 0 0
4128 $        2 0 3     P0
4129 $       -------
4130 $        4 5 6     P1
4131 $
4132 $     Process0 [P0]: rows_owned=[0,1]
4133 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4134 $        j =  {0,0,2}  [size = 3]
4135 $        v =  {1,2,3}  [size = 3]
4136 $
4137 $     Process1 [P1]: rows_owned=[2]
4138 $        i =  {0,3}    [size = nrow+1  = 1+1]
4139 $        j =  {0,1,2}  [size = 3]
4140 $        v =  {4,5,6}  [size = 3]
4141 
4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4143           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4144 @*/
4145 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4146 {
4147   PetscErrorCode ierr;
4148 
4149   PetscFunctionBegin;
4150   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4151   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4152   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4153   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4154   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4155   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4156   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 /*@
4161      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4162          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4163 
4164    Collective
4165 
4166    Input Parameters:
4167 +  mat - the matrix
4168 .  m - number of local rows (Cannot be PETSC_DECIDE)
4169 .  n - This value should be the same as the local size used in creating the
4170        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4171        calculated if N is given) For square matrices n is almost always m.
4172 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4173 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4174 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4175 .  J - column indices
4176 -  v - matrix values
4177 
4178    Level: intermediate
4179 
4180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4181           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4182 @*/
4183 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4184 {
4185   PetscErrorCode ierr;
4186   PetscInt       cstart,nnz,i,j;
4187   PetscInt       *ld;
4188   PetscBool      nooffprocentries;
4189   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4190   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4191   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4192   const PetscInt *Adi = Ad->i;
4193   PetscInt       ldi,Iii,md;
4194 
4195   PetscFunctionBegin;
4196   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4197   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4198   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4199   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4200 
4201   cstart = mat->cmap->rstart;
4202   if (!Aij->ld) {
4203     /* count number of entries below block diagonal */
4204     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4205     Aij->ld = ld;
4206     for (i=0; i<m; i++) {
4207       nnz  = Ii[i+1]- Ii[i];
4208       j     = 0;
4209       while  (J[j] < cstart && j < nnz) {j++;}
4210       J    += nnz;
4211       ld[i] = j;
4212     }
4213   } else {
4214     ld = Aij->ld;
4215   }
4216 
4217   for (i=0; i<m; i++) {
4218     nnz  = Ii[i+1]- Ii[i];
4219     Iii  = Ii[i];
4220     ldi  = ld[i];
4221     md   = Adi[i+1]-Adi[i];
4222     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4223     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4224     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4225     ad  += md;
4226     ao  += nnz - md;
4227   }
4228   nooffprocentries      = mat->nooffprocentries;
4229   mat->nooffprocentries = PETSC_TRUE;
4230   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4231   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4232   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4233   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4234   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4235   mat->nooffprocentries = nooffprocentries;
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 /*@C
4240    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4241    (the default parallel PETSc format).  For good matrix assembly performance
4242    the user should preallocate the matrix storage by setting the parameters
4243    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4244    performance can be increased by more than a factor of 50.
4245 
4246    Collective
4247 
4248    Input Parameters:
4249 +  comm - MPI communicator
4250 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4251            This value should be the same as the local size used in creating the
4252            y vector for the matrix-vector product y = Ax.
4253 .  n - This value should be the same as the local size used in creating the
4254        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4255        calculated if N is given) For square matrices n is almost always m.
4256 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4257 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4258 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4259            (same value is used for all local rows)
4260 .  d_nnz - array containing the number of nonzeros in the various rows of the
4261            DIAGONAL portion of the local submatrix (possibly different for each row)
4262            or NULL, if d_nz is used to specify the nonzero structure.
4263            The size of this array is equal to the number of local rows, i.e 'm'.
4264 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4265            submatrix (same value is used for all local rows).
4266 -  o_nnz - array containing the number of nonzeros in the various rows of the
4267            OFF-DIAGONAL portion of the local submatrix (possibly different for
4268            each row) or NULL, if o_nz is used to specify the nonzero
4269            structure. The size of this array is equal to the number
4270            of local rows, i.e 'm'.
4271 
4272    Output Parameter:
4273 .  A - the matrix
4274 
4275    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4276    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4277    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4278 
4279    Notes:
4280    If the *_nnz parameter is given then the *_nz parameter is ignored
4281 
4282    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4283    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4284    storage requirements for this matrix.
4285 
4286    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4287    processor than it must be used on all processors that share the object for
4288    that argument.
4289 
4290    The user MUST specify either the local or global matrix dimensions
4291    (possibly both).
4292 
4293    The parallel matrix is partitioned across processors such that the
4294    first m0 rows belong to process 0, the next m1 rows belong to
4295    process 1, the next m2 rows belong to process 2 etc.. where
4296    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4297    values corresponding to [m x N] submatrix.
4298 
4299    The columns are logically partitioned with the n0 columns belonging
4300    to 0th partition, the next n1 columns belonging to the next
4301    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4302 
4303    The DIAGONAL portion of the local submatrix on any given processor
4304    is the submatrix corresponding to the rows and columns m,n
4305    corresponding to the given processor. i.e diagonal matrix on
4306    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4307    etc. The remaining portion of the local submatrix [m x (N-n)]
4308    constitute the OFF-DIAGONAL portion. The example below better
4309    illustrates this concept.
4310 
4311    For a square global matrix we define each processor's diagonal portion
4312    to be its local rows and the corresponding columns (a square submatrix);
4313    each processor's off-diagonal portion encompasses the remainder of the
4314    local matrix (a rectangular submatrix).
4315 
4316    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4317 
4318    When calling this routine with a single process communicator, a matrix of
4319    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4320    type of communicator, use the construction mechanism
4321 .vb
4322      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4323 .ve
4324 
4325 $     MatCreate(...,&A);
4326 $     MatSetType(A,MATMPIAIJ);
4327 $     MatSetSizes(A, m,n,M,N);
4328 $     MatMPIAIJSetPreallocation(A,...);
4329 
4330    By default, this format uses inodes (identical nodes) when possible.
4331    We search for consecutive rows with the same nonzero structure, thereby
4332    reusing matrix information to achieve increased efficiency.
4333 
4334    Options Database Keys:
4335 +  -mat_no_inode  - Do not use inodes
4336 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4337 
4338    Example usage:
4339 
4340    Consider the following 8x8 matrix with 34 non-zero values, that is
4341    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4342    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4343    as follows
4344 
4345 .vb
4346             1  2  0  |  0  3  0  |  0  4
4347     Proc0   0  5  6  |  7  0  0  |  8  0
4348             9  0 10  | 11  0  0  | 12  0
4349     -------------------------------------
4350            13  0 14  | 15 16 17  |  0  0
4351     Proc1   0 18  0  | 19 20 21  |  0  0
4352             0  0  0  | 22 23  0  | 24  0
4353     -------------------------------------
4354     Proc2  25 26 27  |  0  0 28  | 29  0
4355            30  0  0  | 31 32 33  |  0 34
4356 .ve
4357 
4358    This can be represented as a collection of submatrices as
4359 
4360 .vb
4361       A B C
4362       D E F
4363       G H I
4364 .ve
4365 
4366    Where the submatrices A,B,C are owned by proc0, D,E,F are
4367    owned by proc1, G,H,I are owned by proc2.
4368 
4369    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4370    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4371    The 'M','N' parameters are 8,8, and have the same values on all procs.
4372 
4373    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4374    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4375    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4376    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4377    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4378    matrix, ans [DF] as another SeqAIJ matrix.
4379 
4380    When d_nz, o_nz parameters are specified, d_nz storage elements are
4381    allocated for every row of the local diagonal submatrix, and o_nz
4382    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4383    One way to choose d_nz and o_nz is to use the max nonzerors per local
4384    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4385    In this case, the values of d_nz,o_nz are
4386 .vb
4387      proc0 : dnz = 2, o_nz = 2
4388      proc1 : dnz = 3, o_nz = 2
4389      proc2 : dnz = 1, o_nz = 4
4390 .ve
4391    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4392    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4393    for proc3. i.e we are using 12+15+10=37 storage locations to store
4394    34 values.
4395 
4396    When d_nnz, o_nnz parameters are specified, the storage is specified
4397    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4398    In the above case the values for d_nnz,o_nnz are
4399 .vb
4400      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4401      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4402      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4403 .ve
4404    Here the space allocated is sum of all the above values i.e 34, and
4405    hence pre-allocation is perfect.
4406 
4407    Level: intermediate
4408 
4409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4410           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4411 @*/
4412 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4413 {
4414   PetscErrorCode ierr;
4415   PetscMPIInt    size;
4416 
4417   PetscFunctionBegin;
4418   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4419   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4420   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4421   if (size > 1) {
4422     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4423     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4424   } else {
4425     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4426     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4427   }
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 /*@C
4432   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4433 
4434   Not collective
4435 
4436   Input Parameter:
4437 . A - The MPIAIJ matrix
4438 
4439   Output Parameters:
4440 + Ad - The local diagonal block as a SeqAIJ matrix
4441 . Ao - The local off-diagonal block as a SeqAIJ matrix
4442 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4443 
4444   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4445   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4446   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4447   local column numbers to global column numbers in the original matrix.
4448 
4449   Level: intermediate
4450 
4451 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4452 @*/
4453 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4454 {
4455   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4456   PetscBool      flg;
4457   PetscErrorCode ierr;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4461   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4462   if (Ad)     *Ad     = a->A;
4463   if (Ao)     *Ao     = a->B;
4464   if (colmap) *colmap = a->garray;
4465   PetscFunctionReturn(0);
4466 }
4467 
4468 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4469 {
4470   PetscErrorCode ierr;
4471   PetscInt       m,N,i,rstart,nnz,Ii;
4472   PetscInt       *indx;
4473   PetscScalar    *values;
4474 
4475   PetscFunctionBegin;
4476   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4477   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4478     PetscInt       *dnz,*onz,sum,bs,cbs;
4479 
4480     if (n == PETSC_DECIDE) {
4481       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4482     }
4483     /* Check sum(n) = N */
4484     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4485     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4486 
4487     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4488     rstart -= m;
4489 
4490     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4491     for (i=0; i<m; i++) {
4492       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4493       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4494       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4495     }
4496 
4497     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4498     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4499     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4500     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4501     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4502     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4503     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4504     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4505     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4506   }
4507 
4508   /* numeric phase */
4509   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4510   for (i=0; i<m; i++) {
4511     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4512     Ii   = i + rstart;
4513     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4514     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4515   }
4516   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4517   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4522 {
4523   PetscErrorCode    ierr;
4524   PetscMPIInt       rank;
4525   PetscInt          m,N,i,rstart,nnz;
4526   size_t            len;
4527   const PetscInt    *indx;
4528   PetscViewer       out;
4529   char              *name;
4530   Mat               B;
4531   const PetscScalar *values;
4532 
4533   PetscFunctionBegin;
4534   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4535   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4536   /* Should this be the type of the diagonal block of A? */
4537   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4538   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4539   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4540   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4541   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4542   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4543   for (i=0; i<m; i++) {
4544     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4545     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4546     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4547   }
4548   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4549   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4550 
4551   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4552   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4553   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4554   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4555   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4556   ierr = PetscFree(name);CHKERRQ(ierr);
4557   ierr = MatView(B,out);CHKERRQ(ierr);
4558   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4559   ierr = MatDestroy(&B);CHKERRQ(ierr);
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4564 {
4565   PetscErrorCode      ierr;
4566   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4567 
4568   PetscFunctionBegin;
4569   if (!merge) PetscFunctionReturn(0);
4570   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4571   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4572   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4573   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4574   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4575   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4576   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4577   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4578   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4579   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4580   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4581   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4582   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4583   ierr = PetscFree(merge);CHKERRQ(ierr);
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 #include <../src/mat/utils/freespace.h>
4588 #include <petscbt.h>
4589 
4590 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4591 {
4592   PetscErrorCode      ierr;
4593   MPI_Comm            comm;
4594   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4595   PetscMPIInt         size,rank,taga,*len_s;
4596   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4597   PetscInt            proc,m;
4598   PetscInt            **buf_ri,**buf_rj;
4599   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4600   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4601   MPI_Request         *s_waits,*r_waits;
4602   MPI_Status          *status;
4603   MatScalar           *aa=a->a;
4604   MatScalar           **abuf_r,*ba_i;
4605   Mat_Merge_SeqsToMPI *merge;
4606   PetscContainer      container;
4607 
4608   PetscFunctionBegin;
4609   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4610   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4611 
4612   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4613   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4614 
4615   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4616   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4617   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4618 
4619   bi     = merge->bi;
4620   bj     = merge->bj;
4621   buf_ri = merge->buf_ri;
4622   buf_rj = merge->buf_rj;
4623 
4624   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4625   owners = merge->rowmap->range;
4626   len_s  = merge->len_s;
4627 
4628   /* send and recv matrix values */
4629   /*-----------------------------*/
4630   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4631   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4632 
4633   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4634   for (proc=0,k=0; proc<size; proc++) {
4635     if (!len_s[proc]) continue;
4636     i    = owners[proc];
4637     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4638     k++;
4639   }
4640 
4641   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4642   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4643   ierr = PetscFree(status);CHKERRQ(ierr);
4644 
4645   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4646   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4647 
4648   /* insert mat values of mpimat */
4649   /*----------------------------*/
4650   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4651   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4652 
4653   for (k=0; k<merge->nrecv; k++) {
4654     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4655     nrows       = *(buf_ri_k[k]);
4656     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4657     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4658   }
4659 
4660   /* set values of ba */
4661   m = merge->rowmap->n;
4662   for (i=0; i<m; i++) {
4663     arow = owners[rank] + i;
4664     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4665     bnzi = bi[i+1] - bi[i];
4666     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4667 
4668     /* add local non-zero vals of this proc's seqmat into ba */
4669     anzi   = ai[arow+1] - ai[arow];
4670     aj     = a->j + ai[arow];
4671     aa     = a->a + ai[arow];
4672     nextaj = 0;
4673     for (j=0; nextaj<anzi; j++) {
4674       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4675         ba_i[j] += aa[nextaj++];
4676       }
4677     }
4678 
4679     /* add received vals into ba */
4680     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4681       /* i-th row */
4682       if (i == *nextrow[k]) {
4683         anzi   = *(nextai[k]+1) - *nextai[k];
4684         aj     = buf_rj[k] + *(nextai[k]);
4685         aa     = abuf_r[k] + *(nextai[k]);
4686         nextaj = 0;
4687         for (j=0; nextaj<anzi; j++) {
4688           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4689             ba_i[j] += aa[nextaj++];
4690           }
4691         }
4692         nextrow[k]++; nextai[k]++;
4693       }
4694     }
4695     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4696   }
4697   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4698   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4699 
4700   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4701   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4702   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4703   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4704   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4705   PetscFunctionReturn(0);
4706 }
4707 
4708 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4709 {
4710   PetscErrorCode      ierr;
4711   Mat                 B_mpi;
4712   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4713   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4714   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4715   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4716   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4717   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4718   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4719   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4720   MPI_Status          *status;
4721   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4722   PetscBT             lnkbt;
4723   Mat_Merge_SeqsToMPI *merge;
4724   PetscContainer      container;
4725 
4726   PetscFunctionBegin;
4727   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4728 
4729   /* make sure it is a PETSc comm */
4730   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4731   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4732   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4733 
4734   ierr = PetscNew(&merge);CHKERRQ(ierr);
4735   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4736 
4737   /* determine row ownership */
4738   /*---------------------------------------------------------*/
4739   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4740   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4741   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4742   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4743   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4744   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4745   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4746 
4747   m      = merge->rowmap->n;
4748   owners = merge->rowmap->range;
4749 
4750   /* determine the number of messages to send, their lengths */
4751   /*---------------------------------------------------------*/
4752   len_s = merge->len_s;
4753 
4754   len          = 0; /* length of buf_si[] */
4755   merge->nsend = 0;
4756   for (proc=0; proc<size; proc++) {
4757     len_si[proc] = 0;
4758     if (proc == rank) {
4759       len_s[proc] = 0;
4760     } else {
4761       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4762       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4763     }
4764     if (len_s[proc]) {
4765       merge->nsend++;
4766       nrows = 0;
4767       for (i=owners[proc]; i<owners[proc+1]; i++) {
4768         if (ai[i+1] > ai[i]) nrows++;
4769       }
4770       len_si[proc] = 2*(nrows+1);
4771       len         += len_si[proc];
4772     }
4773   }
4774 
4775   /* determine the number and length of messages to receive for ij-structure */
4776   /*-------------------------------------------------------------------------*/
4777   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4778   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4779 
4780   /* post the Irecv of j-structure */
4781   /*-------------------------------*/
4782   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4783   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4784 
4785   /* post the Isend of j-structure */
4786   /*--------------------------------*/
4787   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4788 
4789   for (proc=0, k=0; proc<size; proc++) {
4790     if (!len_s[proc]) continue;
4791     i    = owners[proc];
4792     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4793     k++;
4794   }
4795 
4796   /* receives and sends of j-structure are complete */
4797   /*------------------------------------------------*/
4798   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4799   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4800 
4801   /* send and recv i-structure */
4802   /*---------------------------*/
4803   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4804   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4805 
4806   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4807   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4808   for (proc=0,k=0; proc<size; proc++) {
4809     if (!len_s[proc]) continue;
4810     /* form outgoing message for i-structure:
4811          buf_si[0]:                 nrows to be sent
4812                [1:nrows]:           row index (global)
4813                [nrows+1:2*nrows+1]: i-structure index
4814     */
4815     /*-------------------------------------------*/
4816     nrows       = len_si[proc]/2 - 1;
4817     buf_si_i    = buf_si + nrows+1;
4818     buf_si[0]   = nrows;
4819     buf_si_i[0] = 0;
4820     nrows       = 0;
4821     for (i=owners[proc]; i<owners[proc+1]; i++) {
4822       anzi = ai[i+1] - ai[i];
4823       if (anzi) {
4824         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4825         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4826         nrows++;
4827       }
4828     }
4829     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4830     k++;
4831     buf_si += len_si[proc];
4832   }
4833 
4834   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4835   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4836 
4837   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4838   for (i=0; i<merge->nrecv; i++) {
4839     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4840   }
4841 
4842   ierr = PetscFree(len_si);CHKERRQ(ierr);
4843   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4844   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4845   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4846   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4847   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4848   ierr = PetscFree(status);CHKERRQ(ierr);
4849 
4850   /* compute a local seq matrix in each processor */
4851   /*----------------------------------------------*/
4852   /* allocate bi array and free space for accumulating nonzero column info */
4853   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4854   bi[0] = 0;
4855 
4856   /* create and initialize a linked list */
4857   nlnk = N+1;
4858   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4859 
4860   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4861   len  = ai[owners[rank+1]] - ai[owners[rank]];
4862   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4863 
4864   current_space = free_space;
4865 
4866   /* determine symbolic info for each local row */
4867   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4868 
4869   for (k=0; k<merge->nrecv; k++) {
4870     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4871     nrows       = *buf_ri_k[k];
4872     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4873     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4874   }
4875 
4876   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4877   len  = 0;
4878   for (i=0; i<m; i++) {
4879     bnzi = 0;
4880     /* add local non-zero cols of this proc's seqmat into lnk */
4881     arow  = owners[rank] + i;
4882     anzi  = ai[arow+1] - ai[arow];
4883     aj    = a->j + ai[arow];
4884     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4885     bnzi += nlnk;
4886     /* add received col data into lnk */
4887     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4888       if (i == *nextrow[k]) { /* i-th row */
4889         anzi  = *(nextai[k]+1) - *nextai[k];
4890         aj    = buf_rj[k] + *nextai[k];
4891         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4892         bnzi += nlnk;
4893         nextrow[k]++; nextai[k]++;
4894       }
4895     }
4896     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4897 
4898     /* if free space is not available, make more free space */
4899     if (current_space->local_remaining<bnzi) {
4900       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4901       nspacedouble++;
4902     }
4903     /* copy data into free space, then initialize lnk */
4904     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4905     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4906 
4907     current_space->array           += bnzi;
4908     current_space->local_used      += bnzi;
4909     current_space->local_remaining -= bnzi;
4910 
4911     bi[i+1] = bi[i] + bnzi;
4912   }
4913 
4914   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4915 
4916   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4917   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4918   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4919 
4920   /* create symbolic parallel matrix B_mpi */
4921   /*---------------------------------------*/
4922   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4923   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4924   if (n==PETSC_DECIDE) {
4925     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4926   } else {
4927     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4928   }
4929   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4930   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4931   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4932   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4933   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4934 
4935   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4936   B_mpi->assembled  = PETSC_FALSE;
4937   merge->bi         = bi;
4938   merge->bj         = bj;
4939   merge->buf_ri     = buf_ri;
4940   merge->buf_rj     = buf_rj;
4941   merge->coi        = NULL;
4942   merge->coj        = NULL;
4943   merge->owners_co  = NULL;
4944 
4945   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4946 
4947   /* attach the supporting struct to B_mpi for reuse */
4948   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4949   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4950   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4951   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4952   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4953   *mpimat = B_mpi;
4954 
4955   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4956   PetscFunctionReturn(0);
4957 }
4958 
4959 /*@C
4960       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4961                  matrices from each processor
4962 
4963     Collective
4964 
4965    Input Parameters:
4966 +    comm - the communicators the parallel matrix will live on
4967 .    seqmat - the input sequential matrices
4968 .    m - number of local rows (or PETSC_DECIDE)
4969 .    n - number of local columns (or PETSC_DECIDE)
4970 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4971 
4972    Output Parameter:
4973 .    mpimat - the parallel matrix generated
4974 
4975     Level: advanced
4976 
4977    Notes:
4978      The dimensions of the sequential matrix in each processor MUST be the same.
4979      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4980      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4981 @*/
4982 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4983 {
4984   PetscErrorCode ierr;
4985   PetscMPIInt    size;
4986 
4987   PetscFunctionBegin;
4988   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4989   if (size == 1) {
4990     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4991     if (scall == MAT_INITIAL_MATRIX) {
4992       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4993     } else {
4994       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4995     }
4996     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4997     PetscFunctionReturn(0);
4998   }
4999   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5000   if (scall == MAT_INITIAL_MATRIX) {
5001     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5002   }
5003   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5004   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005   PetscFunctionReturn(0);
5006 }
5007 
5008 /*@
5009      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5010           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5011           with MatGetSize()
5012 
5013     Not Collective
5014 
5015    Input Parameters:
5016 +    A - the matrix
5017 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5018 
5019    Output Parameter:
5020 .    A_loc - the local sequential matrix generated
5021 
5022     Level: developer
5023 
5024    Notes:
5025      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5026      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5027      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5028      modify the values of the returned A_loc.
5029 
5030 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5031 @*/
5032 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5033 {
5034   PetscErrorCode    ierr;
5035   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5036   Mat_SeqAIJ        *mat,*a,*b;
5037   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5038   const PetscScalar *aa,*ba,*aav,*bav;
5039   PetscScalar       *ca,*cam;
5040   PetscMPIInt       size;
5041   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5042   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5043   PetscBool         match;
5044 
5045   PetscFunctionBegin;
5046   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5047   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5048   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5049   if (size == 1) {
5050     if (scall == MAT_INITIAL_MATRIX) {
5051       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5052       *A_loc = mpimat->A;
5053     } else if (scall == MAT_REUSE_MATRIX) {
5054       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5055     }
5056     PetscFunctionReturn(0);
5057   }
5058 
5059   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5060   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5061   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5062   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5063   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5064   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5065   aa   = aav;
5066   ba   = bav;
5067   if (scall == MAT_INITIAL_MATRIX) {
5068     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5069     ci[0] = 0;
5070     for (i=0; i<am; i++) {
5071       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5072     }
5073     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5074     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5075     k    = 0;
5076     for (i=0; i<am; i++) {
5077       ncols_o = bi[i+1] - bi[i];
5078       ncols_d = ai[i+1] - ai[i];
5079       /* off-diagonal portion of A */
5080       for (jo=0; jo<ncols_o; jo++) {
5081         col = cmap[*bj];
5082         if (col >= cstart) break;
5083         cj[k]   = col; bj++;
5084         ca[k++] = *ba++;
5085       }
5086       /* diagonal portion of A */
5087       for (j=0; j<ncols_d; j++) {
5088         cj[k]   = cstart + *aj++;
5089         ca[k++] = *aa++;
5090       }
5091       /* off-diagonal portion of A */
5092       for (j=jo; j<ncols_o; j++) {
5093         cj[k]   = cmap[*bj++];
5094         ca[k++] = *ba++;
5095       }
5096     }
5097     /* put together the new matrix */
5098     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5099     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5100     /* Since these are PETSc arrays, change flags to free them as necessary. */
5101     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5102     mat->free_a  = PETSC_TRUE;
5103     mat->free_ij = PETSC_TRUE;
5104     mat->nonew   = 0;
5105   } else if (scall == MAT_REUSE_MATRIX) {
5106     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5107 #if defined(PETSC_USE_DEVICE)
5108     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5109 #endif
5110     ci = mat->i; cj = mat->j; cam = mat->a;
5111     for (i=0; i<am; i++) {
5112       /* off-diagonal portion of A */
5113       ncols_o = bi[i+1] - bi[i];
5114       for (jo=0; jo<ncols_o; jo++) {
5115         col = cmap[*bj];
5116         if (col >= cstart) break;
5117         *cam++ = *ba++; bj++;
5118       }
5119       /* diagonal portion of A */
5120       ncols_d = ai[i+1] - ai[i];
5121       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5122       /* off-diagonal portion of A */
5123       for (j=jo; j<ncols_o; j++) {
5124         *cam++ = *ba++; bj++;
5125       }
5126     }
5127   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5128   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5129   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5130   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5131   PetscFunctionReturn(0);
5132 }
5133 
5134 /*@
5135      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5136           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5137 
5138     Not Collective
5139 
5140    Input Parameters:
5141 +    A - the matrix
5142 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5143 
5144    Output Parameter:
5145 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5146 -    A_loc - the local sequential matrix generated
5147 
5148     Level: developer
5149 
5150    Notes:
5151      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5152 
5153 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5154 
5155 @*/
5156 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5157 {
5158   PetscErrorCode ierr;
5159   Mat            Ao,Ad;
5160   const PetscInt *cmap;
5161   PetscMPIInt    size;
5162   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5163 
5164   PetscFunctionBegin;
5165   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5166   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5167   if (size == 1) {
5168     if (scall == MAT_INITIAL_MATRIX) {
5169       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5170       *A_loc = Ad;
5171     } else if (scall == MAT_REUSE_MATRIX) {
5172       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5173     }
5174     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5175     PetscFunctionReturn(0);
5176   }
5177   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5178   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5179   if (f) {
5180     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5181   } else {
5182     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5183     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5184     Mat_SeqAIJ        *c;
5185     PetscInt          *ai = a->i, *aj = a->j;
5186     PetscInt          *bi = b->i, *bj = b->j;
5187     PetscInt          *ci,*cj;
5188     const PetscScalar *aa,*ba;
5189     PetscScalar       *ca;
5190     PetscInt          i,j,am,dn,on;
5191 
5192     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5193     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5194     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5195     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5196     if (scall == MAT_INITIAL_MATRIX) {
5197       PetscInt k;
5198       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5199       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5200       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5201       ci[0] = 0;
5202       for (i=0,k=0; i<am; i++) {
5203         const PetscInt ncols_o = bi[i+1] - bi[i];
5204         const PetscInt ncols_d = ai[i+1] - ai[i];
5205         ci[i+1] = ci[i] + ncols_o + ncols_d;
5206         /* diagonal portion of A */
5207         for (j=0; j<ncols_d; j++,k++) {
5208           cj[k] = *aj++;
5209           ca[k] = *aa++;
5210         }
5211         /* off-diagonal portion of A */
5212         for (j=0; j<ncols_o; j++,k++) {
5213           cj[k] = dn + *bj++;
5214           ca[k] = *ba++;
5215         }
5216       }
5217       /* put together the new matrix */
5218       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5219       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5220       /* Since these are PETSc arrays, change flags to free them as necessary. */
5221       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5222       c->free_a  = PETSC_TRUE;
5223       c->free_ij = PETSC_TRUE;
5224       c->nonew   = 0;
5225       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5226     } else if (scall == MAT_REUSE_MATRIX) {
5227 #if defined(PETSC_HAVE_DEVICE)
5228       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5229 #endif
5230       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5231       ca = c->a;
5232       for (i=0; i<am; i++) {
5233         const PetscInt ncols_d = ai[i+1] - ai[i];
5234         const PetscInt ncols_o = bi[i+1] - bi[i];
5235         /* diagonal portion of A */
5236         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5237         /* off-diagonal portion of A */
5238         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5239       }
5240     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5241     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5242     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5243     if (glob) {
5244       PetscInt cst, *gidx;
5245 
5246       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5247       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5248       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5249       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5250       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5251     }
5252   }
5253   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5254   PetscFunctionReturn(0);
5255 }
5256 
5257 /*@C
5258      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5259 
5260     Not Collective
5261 
5262    Input Parameters:
5263 +    A - the matrix
5264 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5265 -    row, col - index sets of rows and columns to extract (or NULL)
5266 
5267    Output Parameter:
5268 .    A_loc - the local sequential matrix generated
5269 
5270     Level: developer
5271 
5272 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5273 
5274 @*/
5275 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5276 {
5277   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5278   PetscErrorCode ierr;
5279   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5280   IS             isrowa,iscola;
5281   Mat            *aloc;
5282   PetscBool      match;
5283 
5284   PetscFunctionBegin;
5285   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5286   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5287   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5288   if (!row) {
5289     start = A->rmap->rstart; end = A->rmap->rend;
5290     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5291   } else {
5292     isrowa = *row;
5293   }
5294   if (!col) {
5295     start = A->cmap->rstart;
5296     cmap  = a->garray;
5297     nzA   = a->A->cmap->n;
5298     nzB   = a->B->cmap->n;
5299     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5300     ncols = 0;
5301     for (i=0; i<nzB; i++) {
5302       if (cmap[i] < start) idx[ncols++] = cmap[i];
5303       else break;
5304     }
5305     imark = i;
5306     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5307     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5308     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5309   } else {
5310     iscola = *col;
5311   }
5312   if (scall != MAT_INITIAL_MATRIX) {
5313     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5314     aloc[0] = *A_loc;
5315   }
5316   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5317   if (!col) { /* attach global id of condensed columns */
5318     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5319   }
5320   *A_loc = aloc[0];
5321   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5322   if (!row) {
5323     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5324   }
5325   if (!col) {
5326     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5327   }
5328   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5329   PetscFunctionReturn(0);
5330 }
5331 
5332 /*
5333  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5334  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5335  * on a global size.
5336  * */
5337 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5338 {
5339   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5340   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5341   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5342   PetscMPIInt              owner;
5343   PetscSFNode              *iremote,*oiremote;
5344   const PetscInt           *lrowindices;
5345   PetscErrorCode           ierr;
5346   PetscSF                  sf,osf;
5347   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5348   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5349   MPI_Comm                 comm;
5350   ISLocalToGlobalMapping   mapping;
5351 
5352   PetscFunctionBegin;
5353   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5354   /* plocalsize is the number of roots
5355    * nrows is the number of leaves
5356    * */
5357   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5358   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5359   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5360   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5361   for (i=0;i<nrows;i++) {
5362     /* Find a remote index and an owner for a row
5363      * The row could be local or remote
5364      * */
5365     owner = 0;
5366     lidx  = 0;
5367     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5368     iremote[i].index = lidx;
5369     iremote[i].rank  = owner;
5370   }
5371   /* Create SF to communicate how many nonzero columns for each row */
5372   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5373   /* SF will figure out the number of nonzero colunms for each row, and their
5374    * offsets
5375    * */
5376   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5377   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5378   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5379 
5380   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5381   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5382   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5383   roffsets[0] = 0;
5384   roffsets[1] = 0;
5385   for (i=0;i<plocalsize;i++) {
5386     /* diag */
5387     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5388     /* off diag */
5389     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5390     /* compute offsets so that we relative location for each row */
5391     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5392     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5393   }
5394   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5395   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5396   /* 'r' means root, and 'l' means leaf */
5397   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5398   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5399   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5400   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5401   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5402   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5403   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5404   dntotalcols = 0;
5405   ontotalcols = 0;
5406   ncol = 0;
5407   for (i=0;i<nrows;i++) {
5408     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5409     ncol = PetscMax(pnnz[i],ncol);
5410     /* diag */
5411     dntotalcols += nlcols[i*2+0];
5412     /* off diag */
5413     ontotalcols += nlcols[i*2+1];
5414   }
5415   /* We do not need to figure the right number of columns
5416    * since all the calculations will be done by going through the raw data
5417    * */
5418   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5419   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5420   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5421   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5422   /* diag */
5423   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5424   /* off diag */
5425   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5426   /* diag */
5427   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5428   /* off diag */
5429   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5430   dntotalcols = 0;
5431   ontotalcols = 0;
5432   ntotalcols  = 0;
5433   for (i=0;i<nrows;i++) {
5434     owner = 0;
5435     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5436     /* Set iremote for diag matrix */
5437     for (j=0;j<nlcols[i*2+0];j++) {
5438       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5439       iremote[dntotalcols].rank    = owner;
5440       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5441       ilocal[dntotalcols++]        = ntotalcols++;
5442     }
5443     /* off diag */
5444     for (j=0;j<nlcols[i*2+1];j++) {
5445       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5446       oiremote[ontotalcols].rank    = owner;
5447       oilocal[ontotalcols++]        = ntotalcols++;
5448     }
5449   }
5450   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5451   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5452   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5453   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5454   /* P serves as roots and P_oth is leaves
5455    * Diag matrix
5456    * */
5457   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5458   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5459   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5460 
5461   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5462   /* Off diag */
5463   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5464   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5465   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5466   /* We operate on the matrix internal data for saving memory */
5467   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5468   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5469   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5470   /* Convert to global indices for diag matrix */
5471   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5472   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5473   /* We want P_oth store global indices */
5474   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5475   /* Use memory scalable approach */
5476   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5477   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5478   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5479   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5480   /* Convert back to local indices */
5481   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5482   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5483   nout = 0;
5484   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5485   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5486   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5487   /* Exchange values */
5488   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5489   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5490   /* Stop PETSc from shrinking memory */
5491   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5492   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5493   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5495   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5496   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5497   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5498   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5499   PetscFunctionReturn(0);
5500 }
5501 
5502 /*
5503  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5504  * This supports MPIAIJ and MAIJ
5505  * */
5506 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5507 {
5508   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5509   Mat_SeqAIJ            *p_oth;
5510   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5511   IS                    rows,map;
5512   PetscHMapI            hamp;
5513   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5514   MPI_Comm              comm;
5515   PetscSF               sf,osf;
5516   PetscBool             has;
5517   PetscErrorCode        ierr;
5518 
5519   PetscFunctionBegin;
5520   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5521   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5522   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5523    *  and then create a submatrix (that often is an overlapping matrix)
5524    * */
5525   if (reuse == MAT_INITIAL_MATRIX) {
5526     /* Use a hash table to figure out unique keys */
5527     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5528     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5529     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5530     count = 0;
5531     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5532     for (i=0;i<a->B->cmap->n;i++) {
5533       key  = a->garray[i]/dof;
5534       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5535       if (!has) {
5536         mapping[i] = count;
5537         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5538       } else {
5539         /* Current 'i' has the same value the previous step */
5540         mapping[i] = count-1;
5541       }
5542     }
5543     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5544     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5545     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5546     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5547     off = 0;
5548     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5549     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5550     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5551     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5552     /* In case, the matrix was already created but users want to recreate the matrix */
5553     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5554     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5555     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5556     ierr = ISDestroy(&map);CHKERRQ(ierr);
5557     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5558   } else if (reuse == MAT_REUSE_MATRIX) {
5559     /* If matrix was already created, we simply update values using SF objects
5560      * that as attached to the matrix ealier.
5561      *  */
5562     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5563     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5564     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5565     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5566     /* Update values in place */
5567     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5568     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5569     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5570     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5571   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5572   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5573   PetscFunctionReturn(0);
5574 }
5575 
5576 /*@C
5577     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5578 
5579     Collective on Mat
5580 
5581    Input Parameters:
5582 +    A,B - the matrices in mpiaij format
5583 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5584 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5585 
5586    Output Parameter:
5587 +    rowb, colb - index sets of rows and columns of B to extract
5588 -    B_seq - the sequential matrix generated
5589 
5590     Level: developer
5591 
5592 @*/
5593 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5594 {
5595   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5596   PetscErrorCode ierr;
5597   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5598   IS             isrowb,iscolb;
5599   Mat            *bseq=NULL;
5600 
5601   PetscFunctionBegin;
5602   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5603     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5604   }
5605   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5606 
5607   if (scall == MAT_INITIAL_MATRIX) {
5608     start = A->cmap->rstart;
5609     cmap  = a->garray;
5610     nzA   = a->A->cmap->n;
5611     nzB   = a->B->cmap->n;
5612     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5613     ncols = 0;
5614     for (i=0; i<nzB; i++) {  /* row < local row index */
5615       if (cmap[i] < start) idx[ncols++] = cmap[i];
5616       else break;
5617     }
5618     imark = i;
5619     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5620     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5621     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5622     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5623   } else {
5624     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5625     isrowb  = *rowb; iscolb = *colb;
5626     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5627     bseq[0] = *B_seq;
5628   }
5629   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5630   *B_seq = bseq[0];
5631   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5632   if (!rowb) {
5633     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5634   } else {
5635     *rowb = isrowb;
5636   }
5637   if (!colb) {
5638     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5639   } else {
5640     *colb = iscolb;
5641   }
5642   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5643   PetscFunctionReturn(0);
5644 }
5645 
5646 /*
5647     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5648     of the OFF-DIAGONAL portion of local A
5649 
5650     Collective on Mat
5651 
5652    Input Parameters:
5653 +    A,B - the matrices in mpiaij format
5654 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5655 
5656    Output Parameter:
5657 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5658 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5659 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5660 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5661 
5662     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5663      for this matrix. This is not desirable..
5664 
5665     Level: developer
5666 
5667 */
5668 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5669 {
5670   PetscErrorCode         ierr;
5671   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5672   Mat_SeqAIJ             *b_oth;
5673   VecScatter             ctx;
5674   MPI_Comm               comm;
5675   const PetscMPIInt      *rprocs,*sprocs;
5676   const PetscInt         *srow,*rstarts,*sstarts;
5677   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5678   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5679   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5680   MPI_Request            *rwaits = NULL,*swaits = NULL;
5681   MPI_Status             rstatus;
5682   PetscMPIInt            size,tag,rank,nsends_mpi,nrecvs_mpi;
5683   PETSC_UNUSED PetscMPIInt jj;
5684 
5685   PetscFunctionBegin;
5686   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5687   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5688 
5689   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5690     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5691   }
5692   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5693   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5694 
5695   if (size == 1) {
5696     startsj_s = NULL;
5697     bufa_ptr  = NULL;
5698     *B_oth    = NULL;
5699     PetscFunctionReturn(0);
5700   }
5701 
5702   ctx = a->Mvctx;
5703   tag = ((PetscObject)ctx)->tag;
5704 
5705   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5706   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5707   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5708   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5709   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5710   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5711 
5712   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5713   if (scall == MAT_INITIAL_MATRIX) {
5714     /* i-array */
5715     /*---------*/
5716     /*  post receives */
5717     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5718     for (i=0; i<nrecvs; i++) {
5719       rowlen = rvalues + rstarts[i]*rbs;
5720       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5721       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5722     }
5723 
5724     /* pack the outgoing message */
5725     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5726 
5727     sstartsj[0] = 0;
5728     rstartsj[0] = 0;
5729     len         = 0; /* total length of j or a array to be sent */
5730     if (nsends) {
5731       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5732       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5733     }
5734     for (i=0; i<nsends; i++) {
5735       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5736       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5737       for (j=0; j<nrows; j++) {
5738         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5739         for (l=0; l<sbs; l++) {
5740           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5741 
5742           rowlen[j*sbs+l] = ncols;
5743 
5744           len += ncols;
5745           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5746         }
5747         k++;
5748       }
5749       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5750 
5751       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5752     }
5753     /* recvs and sends of i-array are completed */
5754     i = nrecvs;
5755     while (i--) {
5756       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5757     }
5758     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5759     ierr = PetscFree(svalues);CHKERRQ(ierr);
5760 
5761     /* allocate buffers for sending j and a arrays */
5762     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5763     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5764 
5765     /* create i-array of B_oth */
5766     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5767 
5768     b_othi[0] = 0;
5769     len       = 0; /* total length of j or a array to be received */
5770     k         = 0;
5771     for (i=0; i<nrecvs; i++) {
5772       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5773       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5774       for (j=0; j<nrows; j++) {
5775         b_othi[k+1] = b_othi[k] + rowlen[j];
5776         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5777         k++;
5778       }
5779       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5780     }
5781     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5782 
5783     /* allocate space for j and a arrrays of B_oth */
5784     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5785     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5786 
5787     /* j-array */
5788     /*---------*/
5789     /*  post receives of j-array */
5790     for (i=0; i<nrecvs; i++) {
5791       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5792       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5793     }
5794 
5795     /* pack the outgoing message j-array */
5796     if (nsends) k = sstarts[0];
5797     for (i=0; i<nsends; i++) {
5798       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5799       bufJ  = bufj+sstartsj[i];
5800       for (j=0; j<nrows; j++) {
5801         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5802         for (ll=0; ll<sbs; ll++) {
5803           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5804           for (l=0; l<ncols; l++) {
5805             *bufJ++ = cols[l];
5806           }
5807           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5808         }
5809       }
5810       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5811     }
5812 
5813     /* recvs and sends of j-array are completed */
5814     i = nrecvs;
5815     while (i--) {
5816       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5817     }
5818     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5819   } else if (scall == MAT_REUSE_MATRIX) {
5820     sstartsj = *startsj_s;
5821     rstartsj = *startsj_r;
5822     bufa     = *bufa_ptr;
5823     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5824     b_otha   = b_oth->a;
5825 #if defined(PETSC_HAVE_DEVICE)
5826     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5827 #endif
5828   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5829 
5830   /* a-array */
5831   /*---------*/
5832   /*  post receives of a-array */
5833   for (i=0; i<nrecvs; i++) {
5834     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5835     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5836   }
5837 
5838   /* pack the outgoing message a-array */
5839   if (nsends) k = sstarts[0];
5840   for (i=0; i<nsends; i++) {
5841     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5842     bufA  = bufa+sstartsj[i];
5843     for (j=0; j<nrows; j++) {
5844       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5845       for (ll=0; ll<sbs; ll++) {
5846         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5847         for (l=0; l<ncols; l++) {
5848           *bufA++ = vals[l];
5849         }
5850         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5851       }
5852     }
5853     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5854   }
5855   /* recvs and sends of a-array are completed */
5856   i = nrecvs;
5857   while (i--) {
5858     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5859   }
5860   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5861   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5862 
5863   if (scall == MAT_INITIAL_MATRIX) {
5864     /* put together the new matrix */
5865     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5866 
5867     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5868     /* Since these are PETSc arrays, change flags to free them as necessary. */
5869     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5870     b_oth->free_a  = PETSC_TRUE;
5871     b_oth->free_ij = PETSC_TRUE;
5872     b_oth->nonew   = 0;
5873 
5874     ierr = PetscFree(bufj);CHKERRQ(ierr);
5875     if (!startsj_s || !bufa_ptr) {
5876       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5877       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5878     } else {
5879       *startsj_s = sstartsj;
5880       *startsj_r = rstartsj;
5881       *bufa_ptr  = bufa;
5882     }
5883   }
5884 
5885   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5886   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5887   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5888   PetscFunctionReturn(0);
5889 }
5890 
5891 /*@C
5892   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5893 
5894   Not Collective
5895 
5896   Input Parameters:
5897 . A - The matrix in mpiaij format
5898 
5899   Output Parameter:
5900 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5901 . colmap - A map from global column index to local index into lvec
5902 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5903 
5904   Level: developer
5905 
5906 @*/
5907 #if defined(PETSC_USE_CTABLE)
5908 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5909 #else
5910 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5911 #endif
5912 {
5913   Mat_MPIAIJ *a;
5914 
5915   PetscFunctionBegin;
5916   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5917   PetscValidPointer(lvec, 2);
5918   PetscValidPointer(colmap, 3);
5919   PetscValidPointer(multScatter, 4);
5920   a = (Mat_MPIAIJ*) A->data;
5921   if (lvec) *lvec = a->lvec;
5922   if (colmap) *colmap = a->colmap;
5923   if (multScatter) *multScatter = a->Mvctx;
5924   PetscFunctionReturn(0);
5925 }
5926 
5927 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5930 #if defined(PETSC_HAVE_MKL_SPARSE)
5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5932 #endif
5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5935 #if defined(PETSC_HAVE_ELEMENTAL)
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5937 #endif
5938 #if defined(PETSC_HAVE_SCALAPACK)
5939 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5940 #endif
5941 #if defined(PETSC_HAVE_HYPRE)
5942 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5943 #endif
5944 #if defined(PETSC_HAVE_CUDA)
5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5946 #endif
5947 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5949 #endif
5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5951 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5952 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5953 
5954 /*
5955     Computes (B'*A')' since computing B*A directly is untenable
5956 
5957                n                       p                          p
5958         [             ]       [             ]         [                 ]
5959       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5960         [             ]       [             ]         [                 ]
5961 
5962 */
5963 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5964 {
5965   PetscErrorCode ierr;
5966   Mat            At,Bt,Ct;
5967 
5968   PetscFunctionBegin;
5969   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5970   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5971   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5972   ierr = MatDestroy(&At);CHKERRQ(ierr);
5973   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5974   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5975   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5976   PetscFunctionReturn(0);
5977 }
5978 
5979 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5980 {
5981   PetscErrorCode ierr;
5982   PetscBool      cisdense;
5983 
5984   PetscFunctionBegin;
5985   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5986   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5987   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5988   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5989   if (!cisdense) {
5990     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5991   }
5992   ierr = MatSetUp(C);CHKERRQ(ierr);
5993 
5994   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5995   PetscFunctionReturn(0);
5996 }
5997 
5998 /* ----------------------------------------------------------------*/
5999 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6000 {
6001   Mat_Product *product = C->product;
6002   Mat         A = product->A,B=product->B;
6003 
6004   PetscFunctionBegin;
6005   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6006     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6007 
6008   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6009   C->ops->productsymbolic = MatProductSymbolic_AB;
6010   PetscFunctionReturn(0);
6011 }
6012 
6013 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6014 {
6015   PetscErrorCode ierr;
6016   Mat_Product    *product = C->product;
6017 
6018   PetscFunctionBegin;
6019   if (product->type == MATPRODUCT_AB) {
6020     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6021   }
6022   PetscFunctionReturn(0);
6023 }
6024 /* ----------------------------------------------------------------*/
6025 
6026 /*MC
6027    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6028 
6029    Options Database Keys:
6030 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6031 
6032    Level: beginner
6033 
6034    Notes:
6035     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6036     in this case the values associated with the rows and columns one passes in are set to zero
6037     in the matrix
6038 
6039     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6040     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6041 
6042 .seealso: MatCreateAIJ()
6043 M*/
6044 
6045 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6046 {
6047   Mat_MPIAIJ     *b;
6048   PetscErrorCode ierr;
6049   PetscMPIInt    size;
6050 
6051   PetscFunctionBegin;
6052   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6053 
6054   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6055   B->data       = (void*)b;
6056   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6057   B->assembled  = PETSC_FALSE;
6058   B->insertmode = NOT_SET_VALUES;
6059   b->size       = size;
6060 
6061   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6062 
6063   /* build cache for off array entries formed */
6064   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6065 
6066   b->donotstash  = PETSC_FALSE;
6067   b->colmap      = NULL;
6068   b->garray      = NULL;
6069   b->roworiented = PETSC_TRUE;
6070 
6071   /* stuff used for matrix vector multiply */
6072   b->lvec  = NULL;
6073   b->Mvctx = NULL;
6074 
6075   /* stuff for MatGetRow() */
6076   b->rowindices   = NULL;
6077   b->rowvalues    = NULL;
6078   b->getrowactive = PETSC_FALSE;
6079 
6080   /* flexible pointer used in CUSPARSE classes */
6081   b->spptr = NULL;
6082 
6083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6093 #if defined(PETSC_HAVE_CUDA)
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6095 #endif
6096 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6098 #endif
6099 #if defined(PETSC_HAVE_MKL_SPARSE)
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6101 #endif
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6106 #if defined(PETSC_HAVE_ELEMENTAL)
6107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6108 #endif
6109 #if defined(PETSC_HAVE_SCALAPACK)
6110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6111 #endif
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6114 #if defined(PETSC_HAVE_HYPRE)
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6117 #endif
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6120   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6121   PetscFunctionReturn(0);
6122 }
6123 
6124 /*@C
6125      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6126          and "off-diagonal" part of the matrix in CSR format.
6127 
6128    Collective
6129 
6130    Input Parameters:
6131 +  comm - MPI communicator
6132 .  m - number of local rows (Cannot be PETSC_DECIDE)
6133 .  n - This value should be the same as the local size used in creating the
6134        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6135        calculated if N is given) For square matrices n is almost always m.
6136 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6137 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6138 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6139 .   j - column indices
6140 .   a - matrix values
6141 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6142 .   oj - column indices
6143 -   oa - matrix values
6144 
6145    Output Parameter:
6146 .   mat - the matrix
6147 
6148    Level: advanced
6149 
6150    Notes:
6151        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6152        must free the arrays once the matrix has been destroyed and not before.
6153 
6154        The i and j indices are 0 based
6155 
6156        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6157 
6158        This sets local rows and cannot be used to set off-processor values.
6159 
6160        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6161        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6162        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6163        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6164        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6165        communication if it is known that only local entries will be set.
6166 
6167 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6168           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6169 @*/
6170 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6171 {
6172   PetscErrorCode ierr;
6173   Mat_MPIAIJ     *maij;
6174 
6175   PetscFunctionBegin;
6176   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6177   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6178   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6179   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6180   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6181   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6182   maij = (Mat_MPIAIJ*) (*mat)->data;
6183 
6184   (*mat)->preallocated = PETSC_TRUE;
6185 
6186   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6187   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6188 
6189   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6190   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6191 
6192   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6193   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6194   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6195   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6196 
6197   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6198   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6199   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6200   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6201   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6202   PetscFunctionReturn(0);
6203 }
6204 
6205 /*
6206     Special version for direct calls from Fortran
6207 */
6208 #include <petsc/private/fortranimpl.h>
6209 
6210 /* Change these macros so can be used in void function */
6211 #undef CHKERRQ
6212 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6213 #undef SETERRQ2
6214 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6215 #undef SETERRQ3
6216 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6217 #undef SETERRQ
6218 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6219 
6220 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6221 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6222 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6223 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6224 #else
6225 #endif
6226 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6227 {
6228   Mat            mat  = *mmat;
6229   PetscInt       m    = *mm, n = *mn;
6230   InsertMode     addv = *maddv;
6231   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6232   PetscScalar    value;
6233   PetscErrorCode ierr;
6234 
6235   MatCheckPreallocated(mat,1);
6236   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6237   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6238   {
6239     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6240     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6241     PetscBool roworiented = aij->roworiented;
6242 
6243     /* Some Variables required in the macro */
6244     Mat        A                    = aij->A;
6245     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6246     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6247     MatScalar  *aa                  = a->a;
6248     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6249     Mat        B                    = aij->B;
6250     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6251     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6252     MatScalar  *ba                  = b->a;
6253     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6254      * cannot use "#if defined" inside a macro. */
6255     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6256 
6257     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6258     PetscInt  nonew = a->nonew;
6259     MatScalar *ap1,*ap2;
6260 
6261     PetscFunctionBegin;
6262     for (i=0; i<m; i++) {
6263       if (im[i] < 0) continue;
6264       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6265       if (im[i] >= rstart && im[i] < rend) {
6266         row      = im[i] - rstart;
6267         lastcol1 = -1;
6268         rp1      = aj + ai[row];
6269         ap1      = aa + ai[row];
6270         rmax1    = aimax[row];
6271         nrow1    = ailen[row];
6272         low1     = 0;
6273         high1    = nrow1;
6274         lastcol2 = -1;
6275         rp2      = bj + bi[row];
6276         ap2      = ba + bi[row];
6277         rmax2    = bimax[row];
6278         nrow2    = bilen[row];
6279         low2     = 0;
6280         high2    = nrow2;
6281 
6282         for (j=0; j<n; j++) {
6283           if (roworiented) value = v[i*n+j];
6284           else value = v[i+j*m];
6285           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6286           if (in[j] >= cstart && in[j] < cend) {
6287             col = in[j] - cstart;
6288             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6289 #if defined(PETSC_HAVE_DEVICE)
6290             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6291 #endif
6292           } else if (in[j] < 0) continue;
6293           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6294             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6295             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6296           } else {
6297             if (mat->was_assembled) {
6298               if (!aij->colmap) {
6299                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6300               }
6301 #if defined(PETSC_USE_CTABLE)
6302               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6303               col--;
6304 #else
6305               col = aij->colmap[in[j]] - 1;
6306 #endif
6307               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6308                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6309                 col  =  in[j];
6310                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6311                 B        = aij->B;
6312                 b        = (Mat_SeqAIJ*)B->data;
6313                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6314                 rp2      = bj + bi[row];
6315                 ap2      = ba + bi[row];
6316                 rmax2    = bimax[row];
6317                 nrow2    = bilen[row];
6318                 low2     = 0;
6319                 high2    = nrow2;
6320                 bm       = aij->B->rmap->n;
6321                 ba       = b->a;
6322                 inserted = PETSC_FALSE;
6323               }
6324             } else col = in[j];
6325             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6326 #if defined(PETSC_HAVE_DEVICE)
6327             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6328 #endif
6329           }
6330         }
6331       } else if (!aij->donotstash) {
6332         if (roworiented) {
6333           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6334         } else {
6335           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6336         }
6337       }
6338     }
6339   }
6340   PetscFunctionReturnVoid();
6341 }
6342 
6343 typedef struct {
6344   Mat       *mp;    /* intermediate products */
6345   PetscBool *mptmp; /* is the intermediate product temporary ? */
6346   PetscInt  cp;     /* number of intermediate products */
6347 
6348   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6349   PetscInt    *startsj_s,*startsj_r;
6350   PetscScalar *bufa;
6351   Mat         P_oth;
6352 
6353   /* may take advantage of merging product->B */
6354   Mat Bloc;
6355 
6356   /* cusparse does not have support to split between symbolic and numeric phases
6357      When api_user is true, we don't need to update the numerical values
6358      of the temporary storage */
6359   PetscBool reusesym;
6360 
6361   /* support for COO values insertion */
6362   PetscScalar  *coo_v,*coo_w;
6363   PetscInt     **own;
6364   PetscInt     **off;
6365   PetscBool    hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */
6366   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6367   PetscMemType mtype;
6368 
6369   /* customization */
6370   PetscBool abmerge;
6371   PetscBool P_oth_bind;
6372 } MatMatMPIAIJBACKEND;
6373 
6374 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6375 {
6376   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6377   PetscInt            i;
6378   PetscErrorCode      ierr;
6379 
6380   PetscFunctionBegin;
6381   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6382   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6383   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6384   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6385   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6386   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6387   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6388   for (i = 0; i < mmdata->cp; i++) {
6389     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6390   }
6391   ierr = PetscFree(mmdata->mp);CHKERRQ(ierr);
6392   ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr);
6393   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6394   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6395   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6396   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6397   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6398   PetscFunctionReturn(0);
6399 }
6400 
6401 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6402 {
6403   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6404   PetscErrorCode ierr;
6405 
6406   PetscFunctionBegin;
6407   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6408   if (f) {
6409     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6410   } else {
6411     const PetscScalar *vv;
6412 
6413     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6414     if (n && idx) {
6415       PetscScalar    *w = v;
6416       const PetscInt *oi = idx;
6417       PetscInt       j;
6418 
6419       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6420     } else {
6421       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6422     }
6423     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6424   }
6425   PetscFunctionReturn(0);
6426 }
6427 
6428 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6429 {
6430   MatMatMPIAIJBACKEND *mmdata;
6431   PetscInt            i,n_d,n_o;
6432   PetscErrorCode      ierr;
6433 
6434   PetscFunctionBegin;
6435   MatCheckProduct(C,1);
6436   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6437   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6438   if (!mmdata->reusesym) { /* update temporary matrices */
6439     if (mmdata->P_oth) {
6440       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6441     }
6442     if (mmdata->Bloc) {
6443       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6444     }
6445   }
6446   mmdata->reusesym = PETSC_FALSE;
6447 
6448   for (i = 0; i < mmdata->cp; i++) {
6449     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6450     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6451   }
6452   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6453     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6454 
6455     if (mmdata->mptmp[i]) continue;
6456     if (noff) {
6457       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6458 
6459       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6460       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6461       n_o += noff;
6462       n_d += nown;
6463     } else {
6464       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6465 
6466       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6467       n_d += mm->nz;
6468     }
6469   }
6470   if (mmdata->hasoffproc) { /* offprocess insertion */
6471     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6472     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6473   }
6474   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6475   PetscFunctionReturn(0);
6476 }
6477 
6478 /* Support for Pt * A, A * P, or Pt * A * P */
6479 #define MAX_NUMBER_INTERMEDIATE 4
6480 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6481 {
6482   Mat_Product            *product = C->product;
6483   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE];
6484   Mat_MPIAIJ             *a,*p;
6485   MatMatMPIAIJBACKEND    *mmdata;
6486   ISLocalToGlobalMapping P_oth_l2g = NULL;
6487   IS                     glob = NULL;
6488   const char             *prefix;
6489   char                   pprefix[256];
6490   const PetscInt         *globidx,*P_oth_idx;
6491   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE];
6492   PetscInt               cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j;
6493   MatProductType         ptype;
6494   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6495   PetscMPIInt            size;
6496   PetscErrorCode         ierr;
6497 
6498   PetscFunctionBegin;
6499   MatCheckProduct(C,1);
6500   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6501   ptype = product->type;
6502   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6503   switch (ptype) {
6504   case MATPRODUCT_AB:
6505     A = product->A;
6506     P = product->B;
6507     m = A->rmap->n;
6508     n = P->cmap->n;
6509     M = A->rmap->N;
6510     N = P->cmap->N;
6511     break;
6512   case MATPRODUCT_AtB:
6513     P = product->A;
6514     A = product->B;
6515     m = P->cmap->n;
6516     n = A->cmap->n;
6517     M = P->cmap->N;
6518     N = A->cmap->N;
6519     hasoffproc = PETSC_TRUE;
6520     break;
6521   case MATPRODUCT_PtAP:
6522     A = product->A;
6523     P = product->B;
6524     m = P->cmap->n;
6525     n = P->cmap->n;
6526     M = P->cmap->N;
6527     N = P->cmap->N;
6528     hasoffproc = PETSC_TRUE;
6529     break;
6530   default:
6531     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6532   }
6533   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6534   if (size == 1) hasoffproc = PETSC_FALSE;
6535 
6536   /* defaults */
6537   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6538     mp[i]    = NULL;
6539     mptmp[i] = PETSC_FALSE;
6540     rmapt[i] = -1;
6541     cmapt[i] = -1;
6542     rmapa[i] = NULL;
6543     cmapa[i] = NULL;
6544   }
6545 
6546   /* customization */
6547   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6548   mmdata->reusesym = product->api_user;
6549   if (ptype == MATPRODUCT_AB) {
6550     if (product->api_user) {
6551       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6552       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6553       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6554       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6555     } else {
6556       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6557       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6558       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6559       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6560     }
6561   } else if (ptype == MATPRODUCT_PtAP) {
6562     if (product->api_user) {
6563       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6564       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6565       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6566     } else {
6567       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6568       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6569       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6570     }
6571   }
6572   a = (Mat_MPIAIJ*)A->data;
6573   p = (Mat_MPIAIJ*)P->data;
6574   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6575   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6576   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6577   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6578   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6579   switch (ptype) {
6580   case MATPRODUCT_AB: /* A * P */
6581     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6582 
6583     if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */
6584       /* P is product->B */
6585       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6586       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6587       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6588       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6589       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6590       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6591       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6592       mp[cp]->product->api_user = product->api_user;
6593       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6594       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6595       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6596       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6597       rmapt[cp] = 1;
6598       cmapt[cp] = 2;
6599       cmapa[cp] = globidx;
6600       mptmp[cp] = PETSC_FALSE;
6601       cp++;
6602     } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */
6603       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6604       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6605       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6606       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6607       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6608       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6609       mp[cp]->product->api_user = product->api_user;
6610       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6611       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6612       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6613       rmapt[cp] = 1;
6614       cmapt[cp] = 1;
6615       mptmp[cp] = PETSC_FALSE;
6616       cp++;
6617       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6618       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6619       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6620       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6621       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6622       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6623       mp[cp]->product->api_user = product->api_user;
6624       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6625       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6626       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6627       rmapt[cp] = 1;
6628       cmapt[cp] = 2;
6629       cmapa[cp] = p->garray;
6630       mptmp[cp] = PETSC_FALSE;
6631       cp++;
6632     }
6633     if (mmdata->P_oth) {
6634       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6635       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6636       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6637       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6638       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6639       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6640       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6641       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6642       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6643       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6644       mp[cp]->product->api_user = product->api_user;
6645       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6646       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6647       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6648       rmapt[cp] = 1;
6649       cmapt[cp] = 2;
6650       cmapa[cp] = P_oth_idx;
6651       mptmp[cp] = PETSC_FALSE;
6652       cp++;
6653     }
6654     break;
6655   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6656     /* A is product->B */
6657     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6658     if (A == P) {
6659       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6660       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6661       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6662       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6663       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6664       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6665       mp[cp]->product->api_user = product->api_user;
6666       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6667       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6668       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6669       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6670       rmapt[cp] = 2;
6671       rmapa[cp] = globidx;
6672       cmapt[cp] = 2;
6673       cmapa[cp] = globidx;
6674       mptmp[cp] = PETSC_FALSE;
6675       cp++;
6676     } else {
6677       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6678       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6679       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6680       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6681       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6682       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6683       mp[cp]->product->api_user = product->api_user;
6684       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6685       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6686       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6687       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6688       rmapt[cp] = 1;
6689       cmapt[cp] = 2;
6690       cmapa[cp] = globidx;
6691       mptmp[cp] = PETSC_FALSE;
6692       cp++;
6693       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6694       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6695       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6696       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6697       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6698       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6699       mp[cp]->product->api_user = product->api_user;
6700       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6701       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6702       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6703       rmapt[cp] = 2;
6704       rmapa[cp] = p->garray;
6705       cmapt[cp] = 2;
6706       cmapa[cp] = globidx;
6707       mptmp[cp] = PETSC_FALSE;
6708       cp++;
6709     }
6710     break;
6711   case MATPRODUCT_PtAP:
6712     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6713     /* P is product->B */
6714     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6715     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6716     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6717     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6718     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6719     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6720     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6721     mp[cp]->product->api_user = product->api_user;
6722     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6723     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6724     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6725     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6726     rmapt[cp] = 2;
6727     rmapa[cp] = globidx;
6728     cmapt[cp] = 2;
6729     cmapa[cp] = globidx;
6730     mptmp[cp] = PETSC_FALSE;
6731     cp++;
6732     if (mmdata->P_oth) {
6733       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6734       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6735       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6736       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6737       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6738       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6739       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6740       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6741       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6742       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6743       mp[cp]->product->api_user = product->api_user;
6744       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6745       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6746       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6747       mptmp[cp] = PETSC_TRUE;
6748       cp++;
6749       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6750       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6751       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6752       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6753       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6754       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6755       mp[cp]->product->api_user = product->api_user;
6756       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6757       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6758       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6759       rmapt[cp] = 2;
6760       rmapa[cp] = globidx;
6761       cmapt[cp] = 2;
6762       cmapa[cp] = P_oth_idx;
6763       mptmp[cp] = PETSC_FALSE;
6764       cp++;
6765     }
6766     break;
6767   default:
6768     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6769   }
6770   /* sanity check */
6771   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6772 
6773   ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr);
6774   for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i];
6775   ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr);
6776   for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i];
6777   mmdata->cp = cp;
6778   C->product->data       = mmdata;
6779   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6780   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6781 
6782   /* memory type */
6783   mmdata->mtype = PETSC_MEMTYPE_HOST;
6784   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6785   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6786   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6787   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6788   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6789 
6790   /* prepare coo coordinates for values insertion */
6791   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6792     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6793     if (mptmp[cp]) continue;
6794     if (rmapt[cp] == 2 && hasoffproc) {
6795       const PetscInt *rmap = rmapa[cp];
6796       const PetscInt mr = mp[cp]->rmap->n;
6797       const PetscInt rs = C->rmap->rstart;
6798       const PetscInt re = C->rmap->rend;
6799       const PetscInt *ii  = mm->i;
6800       for (i = 0; i < mr; i++) {
6801         const PetscInt gr = rmap[i];
6802         const PetscInt nz = ii[i+1] - ii[i];
6803         if (gr < rs || gr >= re) ncoo_o += nz;
6804         else ncoo_oown += nz;
6805       }
6806     } else ncoo_d += mm->nz;
6807   }
6808   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr);
6809   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6810   if (hasoffproc) { /* handle offproc values insertion */
6811     PetscSF  msf;
6812     PetscInt ncoo2,*coo_i2,*coo_j2;
6813 
6814     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6815     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6816     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr);
6817     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6818       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6819       PetscInt   *idxoff = mmdata->off[cp];
6820       PetscInt   *idxown = mmdata->own[cp];
6821       if (!mptmp[cp] && rmapt[cp] == 2) {
6822         const PetscInt *rmap = rmapa[cp];
6823         const PetscInt *cmap = cmapa[cp];
6824         const PetscInt *ii  = mm->i;
6825         PetscInt       *coi = coo_i + ncoo_o;
6826         PetscInt       *coj = coo_j + ncoo_o;
6827         const PetscInt mr = mp[cp]->rmap->n;
6828         const PetscInt rs = C->rmap->rstart;
6829         const PetscInt re = C->rmap->rend;
6830         const PetscInt cs = C->cmap->rstart;
6831         for (i = 0; i < mr; i++) {
6832           const PetscInt *jj = mm->j + ii[i];
6833           const PetscInt gr  = rmap[i];
6834           const PetscInt nz  = ii[i+1] - ii[i];
6835           if (gr < rs || gr >= re) {
6836             for (j = ii[i]; j < ii[i+1]; j++) {
6837               *coi++ = gr;
6838               *idxoff++ = j;
6839             }
6840             if (!cmapt[cp]) { /* already global */
6841               for (j = 0; j < nz; j++) *coj++ = jj[j];
6842             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6843               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6844             } else { /* offdiag */
6845               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6846             }
6847             ncoo_o += nz;
6848           } else {
6849             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6850           }
6851         }
6852       }
6853       mmdata->off[cp + 1] = idxoff;
6854       mmdata->own[cp + 1] = idxown;
6855     }
6856 
6857     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6858     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6859     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6860     ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr);
6861     ncoo = ncoo_d + ncoo_oown + ncoo2;
6862     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6863     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6864     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6865     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6866     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6867     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6868     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6869     coo_i = coo_i2;
6870     coo_j = coo_j2;
6871   } else { /* no offproc values insertion */
6872     ncoo = ncoo_d;
6873     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6874 
6875     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6876     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6877     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6878   }
6879   mmdata->hasoffproc = hasoffproc;
6880 
6881   /* on-process indices */
6882   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6883     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6884     PetscInt       *coi = coo_i + ncoo_d;
6885     PetscInt       *coj = coo_j + ncoo_d;
6886     const PetscInt *jj  = mm->j;
6887     const PetscInt *ii  = mm->i;
6888     const PetscInt *cmap = cmapa[cp];
6889     const PetscInt *rmap = rmapa[cp];
6890     const PetscInt mr = mp[cp]->rmap->n;
6891     const PetscInt rs = C->rmap->rstart;
6892     const PetscInt re = C->rmap->rend;
6893     const PetscInt cs = C->cmap->rstart;
6894 
6895     if (mptmp[cp]) continue;
6896     if (rmapt[cp] == 1) {
6897       for (i = 0; i < mr; i++) {
6898         const PetscInt gr = i + rs;
6899         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6900       }
6901       /* columns coo */
6902       if (!cmapt[cp]) {
6903         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6904       } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6905         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs;
6906       } else { /* offdiag */
6907         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6908       }
6909       ncoo_d += mm->nz;
6910     } else if (rmapt[cp] == 2) {
6911       for (i = 0; i < mr; i++) {
6912         const PetscInt *jj = mm->j + ii[i];
6913         const PetscInt gr  = rmap[i];
6914         const PetscInt nz  = ii[i+1] - ii[i];
6915         if (gr >= rs && gr < re) {
6916           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6917           if (!cmapt[cp]) { /* already global */
6918             for (j = 0; j < nz; j++) *coj++ = jj[j];
6919           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6920             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6921           } else { /* offdiag */
6922             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6923           }
6924           ncoo_d += nz;
6925         }
6926       }
6927     }
6928   }
6929   if (glob) {
6930     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6931   }
6932   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6933   if (P_oth_l2g) {
6934     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6935   }
6936   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6937   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6938 
6939   /* preallocate with COO data */
6940   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6941   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6942   PetscFunctionReturn(0);
6943 }
6944 
6945 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6946 {
6947   Mat_Product    *product = mat->product;
6948   PetscErrorCode ierr;
6949 #if defined(PETSC_HAVE_DEVICE)
6950   PetscBool      match = PETSC_FALSE;
6951   PetscBool      usecpu = PETSC_FALSE;
6952 #else
6953   PetscBool      match = PETSC_TRUE;
6954 #endif
6955 
6956   PetscFunctionBegin;
6957   MatCheckProduct(mat,1);
6958 #if defined(PETSC_HAVE_DEVICE)
6959   if (!product->A->boundtocpu && !product->B->boundtocpu) {
6960     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
6961   }
6962   if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */
6963     switch (product->type) {
6964     case MATPRODUCT_AB:
6965       if (product->api_user) {
6966         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6967         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6968         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6969       } else {
6970         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6971         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6972         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6973       }
6974       break;
6975     case MATPRODUCT_AtB:
6976       if (product->api_user) {
6977         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
6978         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6979         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6980       } else {
6981         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
6982         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6983         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6984       }
6985       break;
6986     case MATPRODUCT_PtAP:
6987       if (product->api_user) {
6988         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6989         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6990         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6991       } else {
6992         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6993         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6994         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6995       }
6996       break;
6997     default:
6998       break;
6999     }
7000     match = (PetscBool)!usecpu;
7001   }
7002 #endif
7003   if (match) {
7004     switch (product->type) {
7005     case MATPRODUCT_AB:
7006     case MATPRODUCT_AtB:
7007     case MATPRODUCT_PtAP:
7008       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7009       break;
7010     default:
7011       break;
7012     }
7013   }
7014   /* fallback to MPIAIJ ops */
7015   if (!mat->ops->productsymbolic) {
7016     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7017   }
7018   PetscFunctionReturn(0);
7019 }
7020